1 Raw data

data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")

# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned  
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)

print(data)

2 Continuous Phenotype

# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")

Summary statistics My Image

Histogram My Image

Density My Image

Box_Plot My Image

qq_Plot My Image

Shapiro-Wilk normality test My Image

2.1 Categorical Phenotype

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

data$Categorical <- ifelse(data$T4Cortisol >= 956, "H", 
                           ifelse(data$T4Cortisol <= 190.8, "L", "M"))

table(data$Categorical)
library(ggplot2)

# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))

# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
  geom_bar() +
  labs(title = "Histogram of T4Cortisol by Category",
       x = "Category",
       y = "Frequency") +
  theme_minimal()

# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
  labs(title = "Histogram of T4Cortisol with Color by Category",
       x = "T4 Cortisol",
       y = "Frequency",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_density(alpha = 0.3) +
  labs(title = "Density Plot of T4Cortisol with Color by Category",
       x = "T4Cortisol",
       y = "Density",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
  geom_density() +
  geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
  labs(title = "Density Plot of T4Cortisol with Vertical Lines",
       x = "T4Cortisol",
       y = "Density") +
  theme_minimal()

The animals were sorted in these three categories >H = Hight >M = Medium >L = Low

My Image

The individuals frequency distribution in theese categories are shown in the plots below

My Image My Image My Image My Image

2.2 Removing “outliers”

Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.

library(tidyverse)

data_no_out <- filter(data, T4Cortisol < 1250)

# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")

boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")

hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")

shapiro.test(data$T4Cortisol)

My Image My Image My Image My Image

3 GENOTYPES

Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno

My Image

In this folder we found the following files:

I made a copy of this files in a folder called Raw_files:

/home/bambrozi/2_CORTISOL/RawFiles

This directory has two sub-directories:

4 GWAS

4.1 Checking with Phenotyped Animals also have Genotype

library(data.table)

pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]

#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)

#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,] 

#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)

4.2 Generating a Phenotype file

The phenotype file should have three columns: FID, Animal_id, Phenotype

HO <- rep("HO", 252)

pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))

colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")

pheno_gwas$cdn_id <-  as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)

write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)

4.3 Adjusting the SNP_map to .map

map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)

4.4 Generating the bfiles

system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
    genoplink.bed
  • genoplink.bim
  • genoplink.fam
  • genoplink.log
  • genoplink.nosex

5 Quality Control

We ran the code below to perfom the QC

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean

/home/local/bin/plink \
    --bfile ${DATA} \
    --cow \
    --allow-no-sex \
    --hwe 1e-5 \
    --maf 0.01 \
    --geno 0.1 \
    --mind 0.1 \
    --keep-allele-order \
    --make-bed \
    --out ${RESULT}
    

The server screen outcome is shown below. My Image

After the Quality Control we ended up with

6 KING

To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink.

#!/bin/bash

DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/ONLY_GRASSHILL_AND_PHENO_after_QC/only_grasshill_and_pheno_clean
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/KING/result_king

plink2 \
    --bfile ${DATA} \
    --chr-set 29 \
    --make-king-table \
    --out ${RESULT}

This is the output screen on terminal:

My Image

The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.

My Image

Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga:

setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")

relatedness="result_king.kin0" ## change accordingly!!

library(data.table)

print(relatedness)
rel=fread(relatedness, h = T)
head(rel)

print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354  & IID1!=IID2)
print(dup)
nrow(dup)

So the code above will provide this output if there is any duplicated individual.

My Image

We do not have any duplicated individual

So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC

files:genoplink_clean

After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file

7 PCA

Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.

#!/bin/bash

DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/imput_pca
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/pca_result

plink \
    --bfile ${DATA} \
    --keep-allele-order \
    --chr-set 29 \
    --pca \
    --out ${RESULT}

The PCA output:

My Image

7.1 PCA Plot

After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot

setwd("/home/bambrozi/2_CORTISOL/PCA")

library(ggplot2) 
library(tidyverse)

##
# Visualize PCA results
###

# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)

## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)


# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()


# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
  geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) +  # Add labels for animal IDs
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()

My Image

8 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

This is te output from the code above:

My Image

9 5%

9.1 Manhattan Plot - Bonferroni

gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/nrow(gwas))

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni correction"), bty="n")

The code above creates a Manhattam Plot, the correction for multiple test is the Bonferroni correction

My Image

The code below will save information of Significant SNP.

library(tidyverse)

SNP_sig_bonf <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

Below we can see the 1 significant SNP for Bonferroni correction.

My Image

9.2 FDR

gwas$bh <- p.adjust(gwas$p, method = "BH") #FDR 

The code below will create a file with the significant snp for FDR-BH

SNP_sig_BH <- filter(gwas, bh < 0.05)
write.table(SNP_sig_BH, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_BH.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

As outcome we can see that the FDR-BH method didn’t increase the number of significant SNP.

My Image

9.3 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)

gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf<- -log10(0.05/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")


library(tidyverse)

SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

My Image

Below we can find the list of significant SNPs after the correction for Multiple Testing

My Image

10 qqPlot

I performed the qqPlot analysis using the code below:

gwas<- read.table("GWAS_result.loco.mlma", h=T)
ps<- gwas$p
inflation <- function(ps) {
  chisq <- qchisq(1 - ps, 1)
  lambda <- median(chisq) / qchisq(0.5, 1)
  lambda
}
# Calculating the lambda -  the lambda statistic should be close to 1 if
#the points fall within the expected range, or greater than one if 
# the observed p-values are more significant than expected.
inflation(ps)

bonf<- -log10(0.05/nrow(gwas))

gwas$log<- -log10(gwas$p)

gg_qqplot <- function(ps, ci = 0.95) {
  n  <- length(ps)
  df <- data.frame(
    observed = -log10(sort(ps)),
    expected = -log10(ppoints(n)),
    clower   = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)),
    cupper   = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1))
  )
  log10Pe <- expression(paste("Expected -log"[10], plain(P)))
  log10Po <- expression(paste("Observed -log"[10], plain(P)))
  ggplot(df) +
    geom_ribbon(
      mapping = aes(x = expected, ymin = clower, ymax = cupper),
      alpha = 0.1
    ) +
    geom_point(aes(expected, observed), shape = 1, size = 3) +
    geom_abline(intercept = 0, slope = 1, alpha = 0.5) +
    # geom_line(aes(expected, cupper), linetype = 2, size = 0.5) +
    # geom_line(aes(expected, clower), linetype = 2, size = 0.5) +
    xlab(log10Pe) +
    ylab(log10Po)
}

## plot -----
gg_qqplot(ps) +
  theme_bw(base_size = 24) +
  annotate(
    geom = "text",
    x = -Inf,
    y = Inf,
    hjust = -0.15,
    vjust = 1 + 0.15 * 3,
    label = sprintf("λ = %.2f", inflation(ps)),
    size = 8
  ) +
  theme(
    axis.ticks = element_line(size = 0.5),
    panel.grid = element_blank()
    # panel.grid = element_line(size = 0.5, color = "grey80")
  )

The outcome is the qqplot below, and the lambda value is \(\lambda\) = 1.03

My Image

11 VEP - Variant Effect Prediction

After insert all rsID on VEP the summary is shown below:

My Image

Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:

rs42217767 ps. this variant wasn’t recovered automaticaly by VEP, so I typed the chr and position on the search area of the “genome viewer” My Image

rs110031217 My Image

rs110991998 My Image

rs42089058 My Image

rs41644634 My Image

rs41567074 My Image

ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.

12 GALLO

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt", 
                   head=T, stringsAsFactors = F)

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k_clean.csv")

Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html

Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 11 BTB-01060598 32301594 11 32278324 32766620 488297 - ENSBTAG00000046199 NRXN1 protein_coding
2 11 BTB-01060598 32301594 11 32280416 32280512 97 - ENSBTAG00000044573 U6 snRNA
3 22 ARS-BFGL-NGS-26419 46052082 22 45924535 46818511 893977 - ENSBTAG00000013117 CACNA2D3 protein_coding
4 22 ARS-BFGL-NGS-26419 46052082 22 46051998 46062657 10660 + ENSBTAG00000013124 LRTM1 protein_coding
5 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding
6 26 BTB-00926636 17857480 26 17703169 17846407 143239 - ENSBTAG00000011743 TLL2 protein_coding
7 26 BTB-00926636 17857480 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
8 26 BTA-62125-no-rs 17899619 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
9 26 BTA-62125-no-rs 17899619 26 17925643 18029878 104236 - ENSBTAG00000019872 PIK3AP1 protein_coding
10 28 BTA-99379-no-rs 41666186 28 41609015 41649016 40002 - ENSBTAG00000007540 GLUD1 protein_coding
11 28 BTA-99379-no-rs 41666186 28 41650333 41763371 113039 + ENSBTAG00000019194 SHLD2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
2 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
3 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601
4 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51802
5 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51803
6 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51804
7 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51805
8 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51806
9 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51807
10 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51808
11 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51809
12 22 ARS-BFGL-NGS-26419 46052082 Health 46052080 46052084 51810
13 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51811
14 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51812
15 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51813
16 22 ARS-BFGL-NGS-26419 46052082 Milk 46073517 46073521 243365
17 26 BTB-00926636 17857480 Milk 17809768 17809772 195264
18 26 BTB-00926636 17857480 Milk 17809768 17809772 200269
19 26 BTB-00926636 17857480 Milk 17809768 17809772 200270
20 26 BTB-00926636 17857480 Milk 17809768 17809772 205295
21 26 BTB-00926636 17857480 Milk 17810363 17810367 198859
22 26 BTB-00926636 17857480 Milk 17810363 17810367 203868
23 26 BTB-00926636 17857480 Milk 17810363 17810367 203869
24 26 BTB-00926636 17857480 Milk 17810363 17810367 210297
25 26 BTB-00926636 17857480 Milk 17811021 17811025 198858
26 26 BTB-00926636 17857480 Milk 17811021 17811025 203866
27 26 BTB-00926636 17857480 Milk 17811021 17811025 203867
28 26 BTB-00926636 17857480 Milk 17811021 17811025 210296
29 26 BTB-00926636 17857480 Milk 17811725 17811729 197493
30 26 BTB-00926636 17857480 Milk 17811725 17811729 202704
31 26 BTB-00926636 17857480 Milk 17812273 17812277 195685
32 26 BTB-00926636 17857480 Milk 17812273 17812277 200785
33 26 BTB-00926636 17857480 Milk 17813212 17813216 195285
34 26 BTB-00926636 17857480 Milk 17813212 17813216 200301
35 26 BTB-00926636 17857480 Milk 17813978 17813982 196236
36 26 BTB-00926636 17857480 Milk 17813978 17813982 201468
37 26 BTB-00926636 17857480 Milk 17815237 17815241 198002
38 26 BTB-00926636 17857480 Milk 17815237 17815241 203190
39 26 BTB-00926636 17857480 Milk 17817223 17817227 34750
40 26 BTB-00926636 17857480 Milk 17817223 17817227 195586
41 26 BTB-00926636 17857480 Milk 17821297 17821301 197235
42 26 BTB-00926636 17857480 Milk 17821297 17821301 202455
43 26 BTB-00926636 17857480 Milk 17822209 17822213 197783
44 26 BTB-00926636 17857480 Milk 17822209 17822213 202983
45 26 BTB-00926636 17857480 Milk 17823105 17823109 196204
46 26 BTB-00926636 17857480 Milk 17823105 17823109 201419
47 26 BTB-00926636 17857480 Milk 17824770 17824774 196867
48 26 BTB-00926636 17857480 Milk 17824770 17824774 202101
49 26 BTB-00926636 17857480 Milk 17833551 17833555 195748
50 26 BTB-00926636 17857480 Milk 17833551 17833555 200855
51 26 BTB-00926636 17857480 Milk 17833551 17833555 206213
52 26 BTB-00926636 17857480 Milk 17865069 17865073 197144
53 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 197144
54 26 BTB-00926636 17857480 Milk 17865069 17865073 202367
55 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 202367
56 26 BTB-00926636 17857480 Milk 17865069 17865073 208485
57 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 208485
58 26 BTB-00926636 17857480 Milk 17871201 17871205 197611
59 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 197611
60 26 BTB-00926636 17857480 Milk 17871201 17871205 202816
61 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 202816
62 26 BTB-00926636 17857480 Milk 17871201 17871205 209076
63 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 209076
64 26 BTB-00926636 17857480 Milk 17885871 17885875 63656
65 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 63656
66 26 BTB-00926636 17857480 Milk 17885871 17885875 199068
67 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 199068
68 26 BTB-00926636 17857480 Milk 17885871 17885875 204113
69 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204113
70 26 BTB-00926636 17857480 Milk 17885871 17885875 204114
71 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204114
72 26 BTB-00926636 17857480 Milk 17885871 17885875 210399
73 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 210399
74 26 BTB-00926636 17857480 Milk 17895273 17895277 63657
75 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 63657
76 26 BTB-00926636 17857480 Milk 17895273 17895277 199069
77 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 199069
78 26 BTB-00926636 17857480 Milk 17895273 17895277 204115
79 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204115
80 26 BTB-00926636 17857480 Milk 17895273 17895277 204116
81 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204116
82 26 BTB-00926636 17857480 Milk 17895273 17895277 210400
83 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 210400
84 26 BTB-00926636 17857480 Milk 17895803 17895807 199070
85 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 199070
86 26 BTB-00926636 17857480 Milk 17895803 17895807 204117
87 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204117
88 26 BTB-00926636 17857480 Milk 17895803 17895807 204118
89 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204118
90 26 BTB-00926636 17857480 Milk 17895803 17895807 210401
91 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 210401
92 26 BTB-00926636 17857480 Milk 17900304 17900308 199071
93 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 199071
94 26 BTB-00926636 17857480 Milk 17900304 17900308 204119
95 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204119
96 26 BTB-00926636 17857480 Milk 17900304 17900308 204120
97 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204120
98 26 BTB-00926636 17857480 Milk 17900304 17900308 210402
99 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 210402
100 26 BTB-00926636 17857480 Milk 17902932 17902936 63658
101 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 63658
102 26 BTB-00926636 17857480 Milk 17902932 17902936 199072
103 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 199072
104 26 BTB-00926636 17857480 Milk 17902932 17902936 204121
105 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204121
106 26 BTB-00926636 17857480 Milk 17902932 17902936 204122
107 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204122
108 26 BTB-00926636 17857480 Milk 17902932 17902936 210403
109 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 210403
110 26 BTB-00926636 17857480 Milk 17906861 17906865 63660
111 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 63660
112 26 BTB-00926636 17857480 Milk 17906861 17906865 199074
113 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 199074
114 26 BTB-00926636 17857480 Milk 17906861 17906865 204125
115 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204125
116 26 BTB-00926636 17857480 Milk 17906861 17906865 204126
117 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204126
118 26 BTB-00926636 17857480 Milk 17906861 17906865 210405
119 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 210405
120 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 63654
121 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 199076
122 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204129
123 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204130
124 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 210407
125 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 63655
126 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 197826
127 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203026
128 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203027
129 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 209354
130 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 195860
131 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201008
132 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201009
133 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 206462
134 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 33984
135 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195668
136 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195669
137 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195670
138 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 200758
139 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 199092
140 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 204151
141 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107048
142 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107227

12.1 QTL annotation on GALLO

QTL type

#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)

My Image

QTL type

#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

Reproduction My Image

Health My Image

12.2 QTL enrichment on GALLO

#GALLO
#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_name.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_type.csv")

#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
5 Milk C14 index 35 4437 108 163224 0.0000000 0.0000000 Milk
9 Milk myristoleic acid content 26 3047 108 163224 0.0000000 0.0000000 Milk
10 Milk palmitoleic acid content 15 2422 108 163224 0.0000000 0.0000000 Milk
6 Milk C16 index 12 2002 108 163224 0.0000000 0.0000001 Milk
12 Muscle carnosine content 3 67 108 163224 0.0000131 0.0000497 Meat and Carcass
14 Pregnancy rate 2 944 108 163224 0.1296627 0.4105985 Reproduction
18 Teat length 1 300 108 163224 0.1802436 0.4892327 Exterior
15 Rear leg placement - side view 1 430 108 163224 0.2479752 0.5235032 Exterior
17 Stillbirth 2 1363 108 163224 0.2280294 0.5235032 Reproduction
3 Foot angle 1 672 108 163224 0.3596272 0.6162877 Exterior
7 Milk capric acid content 1 912 108 163224 0.4541067 0.6162877 Milk
8 Milk myristic acid content 1 902 108 163224 0.4504612 0.6162877 Milk
13 Net merit 1 903 108 163224 0.4508269 0.6162877 Production
19 Udder depth 1 695 108 163224 0.3693423 0.6162877 Exterior
16 Somatic cell score 1 1122 108 163224 0.5253603 0.6654564 Health
2 Conception rate 1 1255 108 163224 0.5656375 0.6716945 Reproduction
1 Calving ease 2 3819 108 163224 0.7219243 0.7776744 Reproduction
4 Length of productive life 1 2004 108 163224 0.7367441 0.7776744 Production
11 Milk yield 1 6432 108 163224 0.9870080 0.9870080 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 4 9077 108 163224 0.8569775 0.9999953
2 Health 1 5889 108 163224 0.9811250 0.9999953
3 Meat and Carcass 3 18258 108 163224 0.9997109 0.9999953
4 Milk 91 75352 108 163224 0.0000000 0.0000000
5 Production 2 19640 108 163224 0.9999848 0.9999953
6 Reproduction 7 35008 108 163224 0.9999953 0.9999953

My Image

13 GPROFILER

online version: https://biit.cs.ut.ee/gprofiler/gost

I got a script from Julia Rodrigues about the R package GPROFILER to perform an enrichment of my Genes.

### enriquecimento genico
#install.packages("gprofiler2")
library(gprofiler2)

#Para conferir a lista de organism -> https://biit.cs.ut.ee/gprofiler/page/organism-list

#Obs: eu entro com os ids ENSOAR...
query <- read.table ("/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt", header = T)
query <- query[,c("gene_id")]

gene_enrich <- gost(
  query,
  organism = "btaurus", 
  ordered_query = FALSE,
  multi_query = FALSE,
  significant = TRUE,
  exclude_iea = FALSE,
  measure_underrepresentation = FALSE,
  evcodes = TRUE,
  user_threshold = 0.05,
  correction_method = c("fdr"),
  domain_scope = c("annotated"),
  numeric_ns = "",
  sources = NULL,
  as_short_link = FALSE,
  highlight = FALSE
)


#str(gene_enrich) # para ver o formato dos meus dados

#selecionando apenas as informações da lista que me interessam para fazer meu data.frame 
result_enrich <- data.frame(gene_enrich$result)
result_enrich <- data.frame(Category = result_enrich$source,
                            ID = result_enrich$term_id,
                            Term = result_enrich$term_name,
                            adj_pvalue = result_enrich$p_value,
                            id_ensembl = result_enrich$intersection)

write.table(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=F)
write.csv(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.csv")

Below we can se the significant terms of this enrichment (output):

X Category ID Term adj_pvalue id_ensembl
1 GO:BP GO:0006538 glutamate catabolic process 0.0383792 ENSBTAG00000007540
2 GO:BP GO:0003274 endocardial cushion fusion 0.0383792 ENSBTAG00000019832
3 GO:BP GO:1905005 regulation of epithelial to mesenchymal transition involved in endocardial cushion formation 0.0383792 ENSBTAG00000019832
4 GO:BP GO:1905007 positive regulation of epithelial to mesenchymal transition involved in endocardial cushion formation 0.0383792 ENSBTAG00000019832
5 GO:BP GO:0003430 growth plate cartilage chondrocyte growth 0.0383792 ENSBTAG00000019832
6 GO:BP GO:0003431 growth plate cartilage chondrocyte development 0.0383792 ENSBTAG00000019832
7 GO:BP GO:0003433 chondrocyte development involved in endochondral bone morphogenesis 0.0383792 ENSBTAG00000019832
8 GO:BP GO:0003415 chondrocyte hypertrophy 0.0383792 ENSBTAG00000019832
9 GO:BP GO:0048631 regulation of skeletal muscle tissue growth 0.0383792 ENSBTAG00000011743
10 GO:BP GO:0051138 positive regulation of NK T cell differentiation 0.0383792 ENSBTAG00000019832
11 GO:BP GO:0062043 positive regulation of cardiac epithelial to mesenchymal transition 0.0383792 ENSBTAG00000019832
12 GO:BP GO:0051136 regulation of NK T cell differentiation 0.0383792 ENSBTAG00000019832
13 GO:BP GO:0003186 tricuspid valve morphogenesis 0.0383792 ENSBTAG00000019832
14 GO:BP GO:0062042 regulation of cardiac epithelial to mesenchymal transition 0.0383792 ENSBTAG00000019832
15 GO:BP GO:0060434 bronchus morphogenesis 0.0383792 ENSBTAG00000019832
16 GO:BP GO:0060440 trachea formation 0.0383792 ENSBTAG00000019832
17 GO:BP GO:0060462 lung lobe development 0.0383792 ENSBTAG00000019832
18 GO:BP GO:0060463 lung lobe morphogenesis 0.0383792 ENSBTAG00000019832
19 GO:BP GO:0048632 negative regulation of skeletal muscle tissue growth 0.0383792 ENSBTAG00000011743
20 GO:BP GO:0003175 tricuspid valve development 0.0383792 ENSBTAG00000019832
21 GO:BP GO:0061520 Langerhans cell differentiation 0.0383792 ENSBTAG00000019832
22 GO:BP GO:1905317 inferior endocardial cushion morphogenesis 0.0383792 ENSBTAG00000019832
23 GO:BP GO:2000563 positive regulation of CD4-positive, alpha-beta T cell proliferation 0.0383792 ENSBTAG00000019832
24 GO:BP GO:1990428 miRNA transport 0.0383792 ENSBTAG00000019832
25 GO:BP GO:0002513 tolerance induction to self antigen 0.0383792 ENSBTAG00000019832
26 GO:BP GO:0002514 B cell tolerance induction 0.0383792 ENSBTAG00000019832
27 GO:BP GO:0003149 membranous septum morphogenesis 0.0383792 ENSBTAG00000019832
28 GO:BP GO:1990086 lens fiber cell apoptotic process 0.0383792 ENSBTAG00000019832
29 GO:BP GO:0002520 immune system development 0.0383792 ENSBTAG00000019832,ENSBTAG00000019194
30 GO:BP GO:0002666 positive regulation of T cell tolerance induction 0.0383792 ENSBTAG00000019832
31 GO:BP GO:0002651 positive regulation of tolerance induction to self antigen 0.0383792 ENSBTAG00000019832
32 GO:BP GO:0061343 cell adhesion involved in heart morphogenesis 0.0383792 ENSBTAG00000019832
33 GO:BP GO:0002649 regulation of tolerance induction to self antigen 0.0383792 ENSBTAG00000019832
34 GO:BP GO:0002661 regulation of B cell tolerance induction 0.0383792 ENSBTAG00000019832
35 GO:BP GO:0002664 regulation of T cell tolerance induction 0.0383792 ENSBTAG00000019832
36 GO:BP GO:0002663 positive regulation of B cell tolerance induction 0.0383792 ENSBTAG00000019832
37 GO:BP GO:0002684 positive regulation of immune system process 0.0428818 ENSBTAG00000019832,ENSBTAG00000019872,ENSBTAG00000019194
38 GO:BP GO:0048630 skeletal muscle tissue growth 0.0431888 ENSBTAG00000011743
39 GO:BP GO:0051251 positive regulation of lymphocyte activation 0.0431888 ENSBTAG00000019832,ENSBTAG00000019194
40 GO:BP GO:0034154 toll-like receptor 7 signaling pathway 0.0431888 ENSBTAG00000019872
41 GO:BP GO:0060439 trachea morphogenesis 0.0431888 ENSBTAG00000019832
42 GO:BP GO:0002517 T cell tolerance induction 0.0431888 ENSBTAG00000019832
43 GO:BP GO:0002645 positive regulation of tolerance induction 0.0431888 ENSBTAG00000019832
44 GO:BP GO:0060433 bronchus development 0.0460331 ENSBTAG00000019832
45 GO:BP GO:0003418 growth plate cartilage chondrocyte differentiation 0.0460331 ENSBTAG00000019832
46 GO:BP GO:0002696 positive regulation of leukocyte activation 0.0482941 ENSBTAG00000019832,ENSBTAG00000019194
47 GO:BP GO:0001865 NK T cell differentiation 0.0489635 ENSBTAG00000019832
48 GO:BP GO:0050867 positive regulation of cell activation 0.0496277 ENSBTAG00000019832,ENSBTAG00000019194
49 GO:BP GO:0003198 epithelial to mesenchymal transition involved in endocardial cushion formation 0.0496277 ENSBTAG00000019832
50 GO:BP GO:0002643 regulation of tolerance induction 0.0496277 ENSBTAG00000019832
51 GO:BP GO:2001034 positive regulation of double-strand break repair via nonhomologous end joining 0.0496277 ENSBTAG00000019194
52 GO:MF GO:0004352 glutamate dehydrogenase (NAD+) activity 0.0098128 ENSBTAG00000007540
53 GO:MF GO:0004353 glutamate dehydrogenase [NAD(P)+] activity 0.0098128 ENSBTAG00000007540
54 GO:MF GO:0016639 oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor 0.0098128 ENSBTAG00000007540
55 GO:MF GO:0005026 transforming growth factor beta receptor activity, type II 0.0147171 ENSBTAG00000019832
56 GO:MF GO:0048185 activin binding 0.0326677 ENSBTAG00000019832
57 GO:MF GO:0036312 phosphatidylinositol 3-kinase regulatory subunit binding 0.0326677 ENSBTAG00000019872
58 GO:MF GO:0034713 type I transforming growth factor beta receptor binding 0.0326677 ENSBTAG00000019832
59 GO:MF GO:0017002 activin receptor activity 0.0326677 ENSBTAG00000019832
60 GO:MF GO:0005024 transforming growth factor beta receptor activity 0.0326677 ENSBTAG00000019832
61 GO:MF GO:0050431 transforming growth factor beta binding 0.0461234 ENSBTAG00000019832
62 GO:MF GO:0043548 phosphatidylinositol 3-kinase binding 0.0461234 ENSBTAG00000019872
63 GO:MF GO:0016638 oxidoreductase activity, acting on the CH-NH2 group of donors 0.0461234 ENSBTAG00000007540
64 GO:MF GO:0005160 transforming growth factor beta receptor binding 0.0461234 ENSBTAG00000019832
65 GO:MF GO:0004675 transmembrane receptor protein serine/threonine kinase activity 0.0461234 ENSBTAG00000019832
66 REAC REAC:R-BTA-2243919 Crosslinking of collagen fibrils 0.0440084 ENSBTAG00000011743
67 REAC REAC:R-BTA-2173788 Downregulation of TGF-beta receptor signaling 0.0440084 ENSBTAG00000019832
68 REAC REAC:R-BTA-8964539 Glutamate and glutamine metabolism 0.0440084 ENSBTAG00000007540
69 REAC REAC:R-BTA-112308 Presynaptic depolarization and calcium channel opening 0.0440084 ENSBTAG00000013117
70 REAC REAC:R-BTA-2151201 Transcriptional activation of mitochondrial biogenesis 0.0440084 ENSBTAG00000007540
71 REAC REAC:R-BTA-2173791 TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) 0.0440084 ENSBTAG00000019832

13.1 GPROFILER ON-LINE

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

Additionaly I perfomed a another version of this analysis but with no eletronic GO annotation

My Image

My Image

13.2 Terms distribution Pie Chart

Now I’m gonna make a pie chart with the categories

library(ggplot2)

# Assuming sig_enrich$Category contains the categories
category_counts <- table(result_enrich$Category)

# Create a pie chart
pie_chart <- ggplot(data = NULL, aes(x = factor(1), fill = names(category_counts), y = as.numeric(category_counts))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Distribution among terms",
       x = NULL,
       y = NULL,
       fill = "Category") +
  theme_void() +
  theme(legend.position = "right")

# Print the pie chart
print(pie_chart)

My Image

14 Haplotype Blocks

First I need to generate the .ped file

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file

plink \
    --bfile ${DATA} \
    --cow \
    --recode \
    --out ${RESULT}

Second, is necessary to convert the .ped file to Linkage format:

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in

plink \
    --file ${DATA} \
    --cow \
    --recode HV \
    --out ${RESULT}

The code above will generate a pair of files .ped and .info for each chromosome

I ran the haploview only for chromosome 11, 22, 26, 28 that are when the significant SNP are located.

As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot

My Image

Chr SNP bp A1 A2 Freq Haploview.order
11 BTB-01060598 32301594 C A 0.4503970 680
22 ARS-BFGL-NGS-23411 5134078 A C 0.1865080 88
22 ARS-BFGL-NGS-26419 46052082 A C 0.0813492 762
26 BTB-00926636 17857480 C A 0.0257937 296
26 BTA-62125-no-rs 17899619 C A 0.0257937 297
28 BTA-99379-no-rs 41666186 A C 0.0496032 701

The two significant SNPs from chromosome 26 fell within a Haploblock.

Chr 11 (SNP 680) My Image

Chr 22 (SNP 88) My Image

Chr 22 (SNP 762) My Image

Chr 26 (SNP 296 and 297) My Image

Chr 28 (SNP 701) My Image

14.1 BLOCK 10 in Chr 26

As we found an haploblock on chromosome 26 I decided take a look inside this block. So, on Haploview I checked the name and position of the first SNP of this block (292) and the last (300).

X. Name Position ObsHET PredHET HWpval X.Geno FamTrio MendErr MAF Alleles
292 ARS-BFGL-NGS-111577 17714604 0.401 0.448 0.1161 100 0 0 0.339 A:C
293 ARS-BFGL-NGS-21408 17746468 0.369 0.403 0.2260 100 0 0 0.280 C:A
294 ARS-BFGL-NGS-16328 17778908 0.056 0.054 1.0000 100 0 0 0.028 A:C
295 ARS-BFGL-NGS-117063 17817225 0.413 0.452 0.2022 100 0 0 0.345 C:A
296 BTB-00926636 17857480 0.052 0.050 1.0000 100 0 0 0.026 A:C
297 BTA-62125-no-rs 17899619 0.052 0.050 1.0000 100 0 0 0.026 A:C
298 ARS-BFGL-NGS-110679 17953795 0.397 0.433 0.2211 100 0 0 0.317 A:C
299 ARS-BFGL-NGS-72889 17989430 0.052 0.050 1.0000 100 0 0 0.026 C:A
300 BTB-00926868 18040673 0.385 0.420 0.2319 100 0 0 0.300 A:C

Then I checked on NCBI’s Genome Data Viewer which genes are inside this block, adding 50k before the first SNP and 50K after the last SNP.

This interval has 426,069bp and has this genes inside:

  • DNTT
  • OPALIN
  • TLL2
  • TM9SF3
  • PIK3AP1

As you can see in the image below:

My Image

The genes TLL2, TM9SF3 and PIK3AP1 already were in the gene list from GALLO, but the genes DNTT and OPALIN were new!

I performed a GPROFILER analysis only for these genes spotted in this Block 10.

My Image

15 5 - 15%

After check the outcome for 5% of significance for SNPs, Prof. Flávio asked me to carry out again considering the interval between 5% and 15% of significance.

15.1 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)

gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf05<- -log10(0.05/Me)
bonf15<- -log10(0.15/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf05), col="black")
abline(h=(bonf15), col="red2")
legend("topleft", inset=c(0.05, 0.05), col="red2", lwd=2, 
       legend=c("15% sig. Bonferroni corr. for ind. segments"), bty="n")
legend("topleft", inset=c(0.05, 0.10), col="black", lwd=2, 
       legend=c("5% sig. Bonferroni corr. for ind. segments"), bty="n")

library(tidyverse)

#Recovering the significant SNPs > <5%
SNP_sig_bonf_ind_seg_5 <- filter(gwas, logP > bonf05)
write.csv(SNP_sig_bonf_ind_seg_5, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5.csv")


#Recovering the significant SNPs > 15% and <5%
SNP_sig_bonf_ind_seg_5_15 <- filter(gwas, logP > bonf15 & logP < bonf05)
write.csv(SNP_sig_bonf_ind_seg_5_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5_15.csv")


#Recovering the significant SNPs > 15% 
SNP_sig_bonf_ind_seg_15 <- filter(gwas, logP > bonf15)
write.csv(SNP_sig_bonf_ind_seg_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")

My Image

Below we can find the list of significant SNPs after the correction for Multiple Testing

Chr X X15. X5. X5.15. logP
28 BTA-99379-no-rs X X 6.610970
22 ARS-BFGL-NGS-23411 X X 5.485541
22 ARS-BFGL-NGS-26419 X X 5.272074
26 BTA-62125-no-rs X X 4.732155
26 BTB-00926636 X X 4.732155
11 BTB-01060598 X X 4.682532
20 BTB-01471673 X X 4.182974
23 Hapmap54016-rs29022819 X X 4.016453
28 ARS-BFGL-NGS-11935 X X 3.986177
28 Hapmap43005-BTA-64606 X X 3.986177
11 BTA-122098-no-rs X X 3.980028
19 ARS-BFGL-NGS-29119 X X 3.925461
14 Hapmap39823-BTA-35254 X X 3.905277
19 ARS-BFGL-NGS-28901 X X 3.888442
3 ARS-BFGL-NGS-4585 X X 3.885943
14 UA-IFASA-7664 X X 3.882503

To make easier to check the name of the significant SNPs I made the following Manhattan Plot

# Prepare the dataset
don <- gwas %>% 
  group_by(Chr) %>% 
  summarise(chr_len = max(bp)) %>% 
  mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
  select(-chr_len) %>%
  left_join(gwas, ., by = c("Chr" = "Chr")) %>%
  arrange(Chr, bp) %>%
  mutate(BPcum = as.numeric(bp) + tot) %>%
  mutate(is_highlight = ifelse(logP > bonf15, "yes", "no")) %>%
  mutate(is_annotate = ifelse(logP > bonf15, "yes", "no"))

# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))

# Prepare X axis
axisdf <- don %>% 
  group_by(Chr) %>% 
  summarize(center = (max(BPcum) + min(BPcum)) / 2)

# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)

# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
  geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
  scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
  scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
  geom_hline(yintercept = bonf05, color = "black") +  # Add horizontal line for bonf05
  geom_hline(yintercept = bonf15, color = "red2") +   # Add horizontal line for bonf15
  geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
  geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
  annotate("text", x = Inf, y = Inf, label = "15% sig. Bonferroni corr. for ind. segments", 
           color = "red2", hjust = 1.1, vjust = 2, size = 3) +
  annotate("text", x = Inf, y = Inf, label = "5% sig. Bonferroni corr. for ind. segments", 
           color = "black", hjust = 1.1, vjust = 3.5, size = 3) +
  theme_bw() +
  theme(
    legend.position = "none",
    panel.border = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  ) +
  labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")

My Image

Generating a file with the GWAS output and rsID

library(tidyverse)

# Extracting SNP names to use in the SNPChimp
SNP_NAMES <- SNP_sig_bonf_ind_seg_15 %>% pull(SNP)
SNP_NAMES <- paste(SNP_NAMES, collapse = ", ")
print(SNP_NAMES)

# Copy this printed names and paste on SNPChimp

# Opening the SNPChimp output in R
rsID <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/SNPchimp_result_ind_seg_15.tsv")
rsID <- rsID[,c("rs","SNP_name")]

#Merging in one file the GWAS output and rsID
SNP_rsID_sig_bonf_ind_seg_15 <- merge(SNP_sig_bonf_ind_seg_15, rsID, by.x = "SNP", by.y = "SNP_name")
SNP_rsID_sig_bonf_ind_seg_15 <- SNP_rsID_sig_bonf_ind_seg_15[, c("rs", "SNP", "Chr", "bp", "A1", "A2", "Freq", "b", "se", "p", "logP")]
write.csv(SNP_rsID_sig_bonf_ind_seg_15, "/home/bambrozi/2_CORTISOL/GWAS/SNP_rsID_bonf_ind_seg_15.csv")

Bellow we can see the table with the SNP names and rsID for all significant SNPs with p-value lower than 0,15.

X rs SNP Chr bp A1 A2 Freq b se p logP
1 rs109578343 ARS-BFGL-NGS-11935 28 44942879 A C 0.0297619 412.261 106.1750 0.0001032 3.986177
2 rs110031217 ARS-BFGL-NGS-23411 22 5134078 A C 0.1865080 207.116 44.5110 0.0000033 5.485541
3 rs110991998 ARS-BFGL-NGS-26419 22 46052082 A C 0.0813492 308.357 67.7592 0.0000053 5.272074
4 rs109751680 ARS-BFGL-NGS-28901 19 42338148 C A 0.3710320 141.156 36.8765 0.0001293 3.888442
5 rs41577586 ARS-BFGL-NGS-29119 19 49744592 A C 0.4047620 -142.084 36.9169 0.0001187 3.925461
6 rs109888380 ARS-BFGL-NGS-4585 3 107160292 A C 0.3353170 -144.488 37.7607 0.0001300 3.885943
7 rs41624254 BTA-122098-no-rs 11 31882807 A C 0.4444440 131.779 33.9688 0.0001047 3.980028
8 rs41644634 BTA-62125-no-rs 26 17899619 C A 0.0257937 487.149 113.7690 0.0000185 4.732155
9 rs41567074 BTA-99379-no-rs 28 41666186 A C 0.0496032 435.809 84.4338 0.0000002 6.610970
10 rs42089058 BTB-00926636 26 17857480 C A 0.0257937 487.149 113.7690 0.0000185 4.732155
11 rs42217767 BTB-01060598 11 32301594 C A 0.4503970 -152.746 35.8860 0.0000208 4.682532
12 rs42590312 BTB-01471673 20 11545685 A C 0.2460320 169.707 42.5157 0.0000656 4.182974
13 rs41632193 Hapmap39823-BTA-35254 14 64061208 A C 0.2916670 -138.029 35.9698 0.0001244 3.905277
14 rs41648479 Hapmap43005-BTA-64606 28 45805105 A C 0.0297619 412.261 106.1750 0.0001032 3.986177
15 rs29022819 Hapmap54016-rs29022819 23 33609998 C A 0.1904760 173.228 44.4201 0.0000963 4.016453
16 rs41632223 UA-IFASA-7664 14 64299714 C A 0.2678570 -141.443 36.9840 0.0001311 3.882503

16 qqPlot

17 GWAS on GCTA - considering BIRTH_YEAR and SAMPLING_DATE

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create data_final$cdn_id by matching IDs with elora_id
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

identical(fixeff$cdn_id, pheno$cdn_id)

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt", quote = F, row.names = F, col.names = T)
#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

17.1 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)

gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf<- -log10(0.05/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")


library(tidyverse)

SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg2.csv")

The SNPs below are the significant after the correction above

X Chr SNP bp A1 A2 Freq b se p logP
1 22 ARS-BFGL-NGS-23411 5134078 A C 0.1872510 196.185 43.9934 8.20e-06 5.085250
2 22 ARS-BFGL-NGS-26419 46052082 A C 0.0816733 295.245 66.9504 1.03e-05 4.985404
3 26 BTB-00926636 17857480 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
4 26 BTA-62125-no-rs 17899619 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
5 28 BTA-99379-no-rs 41666186 A C 0.0498008 394.376 83.4532 2.30e-06 5.639587

My Image

Bellow we inserted the rsID searched at SNPCHIMP

X Chr rsID SNP bp A1 A2 Freq b se p logP
1 22 rs110031217 ARS-BFGL-NGS-23411 5134078 A C 0.1872510 196.185 43.9934 8.20e-06 5.085250
2 22 rs110991998 ARS-BFGL-NGS-26419 46052082 A C 0.0816733 295.245 66.9504 1.03e-05 4.985404
3 26 rs42089058 BTB-00926636 17857480 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
4 26 rs41644634 BTA-62125-no-rs 17899619 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
5 28 rs41567074 BTA-99379-no-rs 41666186 A C 0.0498008 394.376 83.4532 2.30e-06 5.639587

17.1.1 Option 2: Manhattan Plot

gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)
# Prepare the dataset
don <- gwas %>% 
  group_by(Chr) %>% 
  summarise(chr_len = max(bp)) %>% 
  mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
  select(-chr_len) %>%
  left_join(gwas, ., by = c("Chr" = "Chr")) %>%
  arrange(Chr, bp) %>%
  mutate(BPcum = as.numeric(bp) + tot) %>%
  mutate(is_highlight = ifelse(logP > bonf, "yes", "no")) %>%
  mutate(is_annotate = ifelse(logP > bonf, "yes", "no"))

# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))

# Prepare X axis
axisdf <- don %>% 
  group_by(Chr) %>% 
  summarize(center = (max(BPcum) + min(BPcum)) / 2)

# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)

# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
  geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
  scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
  scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
  geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
  geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
  theme_bw() +
  theme(
    legend.position = "none",
    panel.border = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  ) +
  labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")

My Image

18 VEP - Variant Effect Prediction

After insert all rsID on VEP the summary is shown below:

My Image

As can be seen, the VEP only analysed 14 variants

Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:

rs41648479 My Image

rs41632223 My Image

rs41632193 My Image

rs42590312 My Image

rs42089058 My Image

rs41567074 My Image

rs41644634 My Image

rs41624254 My Image

rs109888380 My Image

rs41577586 My Image

rs109751680 My Image

rs110991998 My Image

rs110031217 My Image

rs109578343 My Image

ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.

19 VEP - With Birth_year and Sampling_date

After insert all rsID on VEP the summary is shown below:

My Image

As can be seen, the VEP only analysed 14 variants

Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:

rs110031217 My Image

rs110991998 My Image

rs42089058 My Image

rs41644634 My Image

rs41567074 My Image

ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.

20 GALLO

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_clean_15.csv")

Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html

Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 3 ARS-BFGL-NGS-4585 107160292 3 107109148 107110689 1542 + ENSBTAG00000030338 GJA9 protein_coding
2 3 ARS-BFGL-NGS-4585 107160292 3 107111822 107117831 6010 + ENSBTAG00000030337 MYCBP protein_coding
3 3 ARS-BFGL-NGS-4585 107160292 3 107121920 107136338 14419 + ENSBTAG00000009368 RRAGC protein_coding
4 11 BTB-01060598 32301594 11 32278324 32766620 488297 - ENSBTAG00000046199 NRXN1 protein_coding
5 11 BTB-01060598 32301594 11 32280416 32280512 97 - ENSBTAG00000044573 U6 snRNA
6 14 Hapmap39823-BTA-35254 64061208 14 64047857 64103385 55529 + ENSBTAG00000017833 RNF19A protein_coding
7 14 UA-IFASA-7664 64299714 14 64289853 64398324 108472 + ENSBTAG00000019793 RGS22 protein_coding
8 19 ARS-BFGL-NGS-29119 49744592 19 49662140 49766195 104056 + ENSBTAG00000052072 B3GNTL1 protein_coding
9 19 ARS-BFGL-NGS-29119 49744592 19 49768681 49927209 158529 - ENSBTAG00000015414 TBCD protein_coding
10 19 ARS-BFGL-NGS-28901 42338148 19 42283857 42299780 15924 - ENSBTAG00000001195 KCNH4 protein_coding
11 19 ARS-BFGL-NGS-28901 42338148 19 42302320 42303571 1252 - ENSBTAG00000000665 HCRT protein_coding
12 19 ARS-BFGL-NGS-28901 42338148 19 42307188 42312293 5106 - ENSBTAG00000000666 GHDC protein_coding
13 19 ARS-BFGL-NGS-28901 42338148 19 42319170 42357910 38741 - ENSBTAG00000010125 STAT5B protein_coding
14 19 ARS-BFGL-NGS-28901 42338148 19 42387164 42390885 3722 + ENSBTAG00000052763 NA lncRNA
15 22 ARS-BFGL-NGS-26419 46052082 22 45924535 46818511 893977 - ENSBTAG00000013117 CACNA2D3 protein_coding
16 22 ARS-BFGL-NGS-26419 46052082 22 46051998 46062657 10660 + ENSBTAG00000013124 LRTM1 protein_coding
17 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding
18 23 Hapmap54016-rs29022819 33609998 23 33618285 33618377 93 + ENSBTAG00000045279 SNORA70 snoRNA
19 26 BTB-00926636 17857480 26 17703169 17846407 143239 - ENSBTAG00000011743 TLL2 protein_coding
20 26 BTB-00926636 17857480 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
21 26 BTA-62125-no-rs 17899619 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
22 26 BTA-62125-no-rs 17899619 26 17925643 18029878 104236 - ENSBTAG00000019872 PIK3AP1 protein_coding
23 28 BTA-99379-no-rs 41666186 28 41609015 41649016 40002 - ENSBTAG00000007540 GLUD1 protein_coding
24 28 BTA-99379-no-rs 41666186 28 41650333 41763371 113039 + ENSBTAG00000019194 SHLD2 protein_coding
25 28 Hapmap43005-BTA-64606 45805105 28 45757443 45768564 11122 + ENSBTAG00000012393 AGT protein_coding
26 28 Hapmap43005-BTA-64606 45805105 28 45772443 45814853 42411 - ENSBTAG00000020314 COG2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40198
2 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40199
3 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40200
4 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40201
5 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40202
6 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40203
7 3 ARS-BFGL-NGS-4585 107160292 Production 107191525 107191529 40204
8 3 ARS-BFGL-NGS-4585 107160292 Production 107191525 107191529 40205
9 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40206
10 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40207
11 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40208
12 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40209
13 3 ARS-BFGL-NGS-4585 107160292 Health 107191525 107191529 40210
14 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40211
15 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40212
16 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40213
17 11 BTA-122098-no-rs 31882807 Milk 31870044 31870048 122275
18 14 Hapmap39823-BTA-35254 64061208 Milk 64013785 64013789 247433
19 14 Hapmap39823-BTA-35254 64061208 Milk 64013858 64013862 241580
20 14 Hapmap39823-BTA-35254 64061208 Milk 64013858 64013862 252031
21 14 Hapmap39823-BTA-35254 64061208 Milk 64013897 64013901 241079
22 14 Hapmap39823-BTA-35254 64061208 Milk 64013897 64013901 251434
23 14 Hapmap39823-BTA-35254 64061208 Milk 64013952 64013956 241853
24 14 Hapmap39823-BTA-35254 64061208 Milk 64013952 64013956 252403
25 14 Hapmap39823-BTA-35254 64061208 Milk 64015726 64015730 105628
26 14 Hapmap39823-BTA-35254 64061208 Milk 64017424 64017428 252151
27 14 Hapmap39823-BTA-35254 64061208 Milk 64017970 64017974 18873
28 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 18872
29 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 105335
30 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 173792
31 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 175101
32 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 176088
33 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 176487
34 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 252723
35 14 Hapmap39823-BTA-35254 64061208 Milk 64063422 64063426 175102
36 14 Hapmap39823-BTA-35254 64061208 Milk 64068329 64068333 105336
37 14 Hapmap39823-BTA-35254 64061208 Milk 64081490 64081494 104637
38 14 Hapmap39823-BTA-35254 64061208 Milk 64081490 64081494 104890
39 14 Hapmap39823-BTA-35254 64061208 Health 64081490 64081494 179830
40 14 Hapmap39823-BTA-35254 64061208 Milk 64087234 64087238 175103
41 14 Hapmap39823-BTA-35254 64061208 Milk 64087234 64087238 252869
42 14 Hapmap39823-BTA-35254 64061208 Milk 64087450 64087454 252628
43 14 Hapmap39823-BTA-35254 64061208 Milk 64088569 64088573 252926
44 14 Hapmap39823-BTA-35254 64061208 Milk 64103583 64103587 104638
45 14 Hapmap39823-BTA-35254 64061208 Milk 64103583 64103587 104891
46 14 UA-IFASA-7664 64299714 Milk 64272869 64272873 102476
47 14 UA-IFASA-7664 64299714 Milk 64272869 64272873 104484
48 14 UA-IFASA-7664 64299714 Milk 64272869 64272873 104863
49 14 UA-IFASA-7664 64299714 Health 64272869 64272873 179764
50 14 UA-IFASA-7664 64299714 Milk 64283263 64283267 175108
51 14 UA-IFASA-7664 64299714 Milk 64283263 64283267 251800
52 14 UA-IFASA-7664 64299714 Milk 64283947 64283951 251398
53 14 UA-IFASA-7664 64299714 Milk 64288968 64288972 247633
54 14 UA-IFASA-7664 64299714 Milk 64288968 64288972 252142
55 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 18875
56 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 105660
57 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 158137
58 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 173793
59 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 175109
60 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 176085
61 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 176486
62 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 250009
63 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 252724
64 14 UA-IFASA-7664 64299714 Milk 64302757 64302761 251934
65 14 UA-IFASA-7664 64299714 Milk 64308608 64308612 252948
66 14 UA-IFASA-7664 64299714 Milk 64317001 64317005 252652
67 14 UA-IFASA-7664 64299714 Milk 64322496 64322500 251752
68 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20269
69 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20311
70 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20393
71 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20464
72 14 UA-IFASA-7664 64299714 Milk 64327904 64327908 32596
73 14 UA-IFASA-7664 64299714 Milk 64327904 64327908 32634
74 14 UA-IFASA-7664 64299714 Milk 64328322 64328326 252206
75 14 UA-IFASA-7664 64299714 Milk 64339118 64339122 251497
76 14 UA-IFASA-7664 64299714 Milk 64343288 64343292 252287
77 19 ARS-BFGL-NGS-28901 42338148 Milk 42292941 42292945 147657
78 19 ARS-BFGL-NGS-28901 42338148 Milk 42292941 42292945 147675
79 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173073
80 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173074
81 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173075
82 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173076
83 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173077
84 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173237
85 19 ARS-BFGL-NGS-28901 42338148 Milk 42338146 42338150 166545
86 19 ARS-BFGL-NGS-28901 42338148 Milk 42338146 42338150 166546
87 19 ARS-BFGL-NGS-28901 42338148 Milk 42338146 42338150 253605
88 19 ARS-BFGL-NGS-28901 42338148 Milk 42349650 42349654 147630
89 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 195323
90 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 195324
91 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 195325
92 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 205408
93 19 ARS-BFGL-NGS-29119 49744592 Meat_and_Carcass 49715846 49715850 107754
94 19 ARS-BFGL-NGS-29119 49744592 Milk 49735877 49735881 196362
95 19 ARS-BFGL-NGS-29119 49744592 Milk 49772086 49772090 195342
96 20 BTB-01471673 11545685 Production 11498161 11498165 65554
97 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
98 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
99 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601
100 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51802
101 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51803
102 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51804
103 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51805
104 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51806
105 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51807
106 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51808
107 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51809
108 22 ARS-BFGL-NGS-26419 46052082 Health 46052080 46052084 51810
109 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51811
110 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51812
111 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51813
112 22 ARS-BFGL-NGS-26419 46052082 Milk 46073517 46073521 243365
113 26 BTB-00926636 17857480 Milk 17809768 17809772 195264
114 26 BTB-00926636 17857480 Milk 17809768 17809772 200269
115 26 BTB-00926636 17857480 Milk 17809768 17809772 200270
116 26 BTB-00926636 17857480 Milk 17809768 17809772 205295
117 26 BTB-00926636 17857480 Milk 17810363 17810367 198859
118 26 BTB-00926636 17857480 Milk 17810363 17810367 203868
119 26 BTB-00926636 17857480 Milk 17810363 17810367 203869
120 26 BTB-00926636 17857480 Milk 17810363 17810367 210297
121 26 BTB-00926636 17857480 Milk 17811021 17811025 198858
122 26 BTB-00926636 17857480 Milk 17811021 17811025 203866
123 26 BTB-00926636 17857480 Milk 17811021 17811025 203867
124 26 BTB-00926636 17857480 Milk 17811021 17811025 210296
125 26 BTB-00926636 17857480 Milk 17811725 17811729 197493
126 26 BTB-00926636 17857480 Milk 17811725 17811729 202704
127 26 BTB-00926636 17857480 Milk 17812273 17812277 195685
128 26 BTB-00926636 17857480 Milk 17812273 17812277 200785
129 26 BTB-00926636 17857480 Milk 17813212 17813216 195285
130 26 BTB-00926636 17857480 Milk 17813212 17813216 200301
131 26 BTB-00926636 17857480 Milk 17813978 17813982 196236
132 26 BTB-00926636 17857480 Milk 17813978 17813982 201468
133 26 BTB-00926636 17857480 Milk 17815237 17815241 198002
134 26 BTB-00926636 17857480 Milk 17815237 17815241 203190
135 26 BTB-00926636 17857480 Milk 17817223 17817227 34750
136 26 BTB-00926636 17857480 Milk 17817223 17817227 195586
137 26 BTB-00926636 17857480 Milk 17821297 17821301 197235
138 26 BTB-00926636 17857480 Milk 17821297 17821301 202455
139 26 BTB-00926636 17857480 Milk 17822209 17822213 197783
140 26 BTB-00926636 17857480 Milk 17822209 17822213 202983
141 26 BTB-00926636 17857480 Milk 17823105 17823109 196204
142 26 BTB-00926636 17857480 Milk 17823105 17823109 201419
143 26 BTB-00926636 17857480 Milk 17824770 17824774 196867
144 26 BTB-00926636 17857480 Milk 17824770 17824774 202101
145 26 BTB-00926636 17857480 Milk 17833551 17833555 195748
146 26 BTB-00926636 17857480 Milk 17833551 17833555 200855
147 26 BTB-00926636 17857480 Milk 17833551 17833555 206213
148 26 BTB-00926636 17857480 Milk 17865069 17865073 197144
149 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 197144
150 26 BTB-00926636 17857480 Milk 17865069 17865073 202367
151 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 202367
152 26 BTB-00926636 17857480 Milk 17865069 17865073 208485
153 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 208485
154 26 BTB-00926636 17857480 Milk 17871201 17871205 197611
155 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 197611
156 26 BTB-00926636 17857480 Milk 17871201 17871205 202816
157 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 202816
158 26 BTB-00926636 17857480 Milk 17871201 17871205 209076
159 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 209076
160 26 BTB-00926636 17857480 Milk 17885871 17885875 63656
161 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 63656
162 26 BTB-00926636 17857480 Milk 17885871 17885875 199068
163 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 199068
164 26 BTB-00926636 17857480 Milk 17885871 17885875 204113
165 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204113
166 26 BTB-00926636 17857480 Milk 17885871 17885875 204114
167 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204114
168 26 BTB-00926636 17857480 Milk 17885871 17885875 210399
169 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 210399
170 26 BTB-00926636 17857480 Milk 17895273 17895277 63657
171 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 63657
172 26 BTB-00926636 17857480 Milk 17895273 17895277 199069
173 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 199069
174 26 BTB-00926636 17857480 Milk 17895273 17895277 204115
175 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204115
176 26 BTB-00926636 17857480 Milk 17895273 17895277 204116
177 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204116
178 26 BTB-00926636 17857480 Milk 17895273 17895277 210400
179 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 210400
180 26 BTB-00926636 17857480 Milk 17895803 17895807 199070
181 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 199070
182 26 BTB-00926636 17857480 Milk 17895803 17895807 204117
183 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204117
184 26 BTB-00926636 17857480 Milk 17895803 17895807 204118
185 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204118
186 26 BTB-00926636 17857480 Milk 17895803 17895807 210401
187 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 210401
188 26 BTB-00926636 17857480 Milk 17900304 17900308 199071
189 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 199071
190 26 BTB-00926636 17857480 Milk 17900304 17900308 204119
191 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204119
192 26 BTB-00926636 17857480 Milk 17900304 17900308 204120
193 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204120
194 26 BTB-00926636 17857480 Milk 17900304 17900308 210402
195 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 210402
196 26 BTB-00926636 17857480 Milk 17902932 17902936 63658
197 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 63658
198 26 BTB-00926636 17857480 Milk 17902932 17902936 199072
199 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 199072
200 26 BTB-00926636 17857480 Milk 17902932 17902936 204121
201 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204121
202 26 BTB-00926636 17857480 Milk 17902932 17902936 204122
203 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204122
204 26 BTB-00926636 17857480 Milk 17902932 17902936 210403
205 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 210403
206 26 BTB-00926636 17857480 Milk 17906861 17906865 63660
207 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 63660
208 26 BTB-00926636 17857480 Milk 17906861 17906865 199074
209 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 199074
210 26 BTB-00926636 17857480 Milk 17906861 17906865 204125
211 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204125
212 26 BTB-00926636 17857480 Milk 17906861 17906865 204126
213 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204126
214 26 BTB-00926636 17857480 Milk 17906861 17906865 210405
215 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 210405
216 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 63654
217 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 199076
218 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204129
219 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204130
220 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 210407
221 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 63655
222 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 197826
223 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203026
224 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203027
225 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 209354
226 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 195860
227 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201008
228 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201009
229 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 206462
230 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 33984
231 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195668
232 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195669
233 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195670
234 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 200758
235 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 199092
236 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 204151
237 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107048
238 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107227
239 28 ARS-BFGL-NGS-11935 44942879 Reproduction 44971508 44971512 238444
240 28 ARS-BFGL-NGS-11935 44942879 Production 44981682 44981686 66703
241 28 ARS-BFGL-NGS-11935 44942879 Production 44981682 44981686 66704
242 28 ARS-BFGL-NGS-11935 44942879 Production 44981682 44981686 69429
243 28 Hapmap43005-BTA-64606 45805105 Milk 45811156 45811160 34467

20.1 QTL annotation on GALLO

QTL type

#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)

My Image

QTL type

#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

Reproduction My Image

Health My Image

20.2 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_name_15.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_type_15.csv")

#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
15 Milk C14 index 35 4437 209 163224 0.0000000 0.0000000 Milk
28 Milk myristoleic acid content 26 3047 209 163224 0.0000000 0.0000000 Milk
31 Milk protein percentage 39 8803 209 163224 0.0000000 0.0000000 Milk
29 Milk palmitoleic acid content 15 2422 209 163224 0.0000007 0.0000079 Milk
16 Milk C16 index 12 2002 209 163224 0.0000129 0.0001159 Milk
36 Muscle carnosine content 3 67 209 163224 0.0000933 0.0006998 Meat and Carcass
23 Milk lactose content 3 105 209 163224 0.0003523 0.0022648 Milk
20 Milk fat content 2 92 209 163224 0.0063319 0.0356168 Milk
27 Milk myristic acid content 4 902 209 163224 0.0294859 0.1449683 Milk
33 Milk stearic acid content 2 218 209 163224 0.0322152 0.1449683 Milk
30 Milk pentadecylic acid content 2 280 209 163224 0.0505491 0.2067917 Milk
44 Teat length 2 300 209 163224 0.0570974 0.2141153 Exterior
14 Milk arachidic acid content 1 60 209 163224 0.0740084 0.2220251 Milk
25 Milk margaric acid content 1 59 209 163224 0.0728207 0.2220251 Milk
26 Milk mid-infrared spectra 1 55 209 163224 0.0680550 0.2220251 Milk
8 Ketosis 2 441 209 163224 0.1101333 0.2915293 Health
40 Rear leg placement - side view 2 430 209 163224 0.1056354 0.2915293 Exterior
39 Pregnancy rate 3 944 209 163224 0.1218012 0.3045029 Reproduction
37 Myristic acid content 1 120 209 163224 0.1425637 0.3376510 Meat and Carcass
6 Fat thickness at the 12th rib 1 169 209 163224 0.1947851 0.4382665 Meat and Carcass
7 Foot angle 2 672 209 163224 0.2129475 0.4563160 Exterior
13 Maturity rate 1 243 209 163224 0.2677107 0.5072331 Production
17 Milk C18 index 1 246 209 163224 0.2705243 0.5072331 Milk
42 Stillbirth 3 1363 209 163224 0.2544359 0.5072331 Reproduction
18 Milk capric acid content 2 912 209 163224 0.3259016 0.5640605 Milk
38 Net merit 2 903 209 163224 0.3216964 0.5640605 Production
24 Milk lauric acid content 1 366 209 163224 0.3746664 0.6021425 Milk
32 Milk protein yield 5 3093 209 163224 0.3633449 0.6021425 Milk
41 Somatic cell score 2 1122 209 163224 0.4213659 0.6538437 Health
5 Dairy form 1 514 209 163224 0.4829472 0.7244208 Exterior
3 Calving ease 5 3819 209 163224 0.5419946 0.7725995 Reproduction
9 Lean meat yield 1 621 209 163224 0.5494041 0.7725995 Meat and Carcass
43 Strength 1 664 209 163224 0.5736508 0.7813075 Exterior
45 Udder depth 1 695 209 163224 0.5903212 0.7813075 Exterior
19 Milk caprylic acid content 1 811 209 163224 0.6471450 0.8320436 Milk
10 Length of productive life 2 2004 209 163224 0.7280866 0.8855108 Production
35 Milking speed 1 1011 209 163224 0.7273020 0.8855108 Milk
2 Body weight gain 1 1354 209 163224 0.8248432 0.9440581 Production
4 Conception rate 1 1255 209 163224 0.8009513 0.9440581 Reproduction
11 Longissimus muscle area 1 1420 209 163224 0.8391628 0.9440581 Meat and Carcass
12 Marbling score 1 1817 209 163224 0.9037804 0.9830306 Meat and Carcass
34 Milk yield 5 6432 209 163224 0.9174952 0.9830306 Milk
1 Body weight 2 4289 209 163224 0.9746418 0.9967928 Production
21 Milk fat percentage 8 10941 209 163224 0.9726277 0.9967928 Milk
22 Milk fat yield 1 8220 209 163224 0.9999797 0.9999797 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 9 9077 209 163224 0.8264587 1
2 Health 4 5889 209 163224 0.9456284 1
3 Meat and Carcass 8 18258 209 163224 0.9999640 1
4 Milk 168 75352 209 163224 0.0000000 0
5 Production 8 19640 209 163224 0.9999916 1
6 Reproduction 12 35008 209 163224 1.0000000 1

My Image

21 GALLO - with Birth_Year and Sampling_Date

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.csv")

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl2.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl_clean2.csv")

Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html

Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 22 ARS-BFGL-NGS-26419 46052082 22 45924535 46818511 893977 - ENSBTAG00000013117 CACNA2D3 protein_coding
2 22 ARS-BFGL-NGS-26419 46052082 22 46051998 46062657 10660 + ENSBTAG00000013124 LRTM1 protein_coding
3 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding
4 26 BTB-00926636 17857480 26 17703169 17846407 143239 - ENSBTAG00000011743 TLL2 protein_coding
5 26 BTB-00926636 17857480 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
6 26 BTA-62125-no-rs 17899619 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
7 26 BTA-62125-no-rs 17899619 26 17925643 18029878 104236 - ENSBTAG00000019872 PIK3AP1 protein_coding
8 28 BTA-99379-no-rs 41666186 28 41609015 41649016 40002 - ENSBTAG00000007540 GLUD1 protein_coding
9 28 BTA-99379-no-rs 41666186 28 41650333 41763371 113039 + ENSBTAG00000019194 SHLD2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
2 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
3 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601
4 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51802
5 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51803
6 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51804
7 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51805
8 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51806
9 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51807
10 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51808
11 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51809
12 22 ARS-BFGL-NGS-26419 46052082 Health 46052080 46052084 51810
13 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51811
14 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51812
15 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51813
16 22 ARS-BFGL-NGS-26419 46052082 Milk 46073517 46073521 243365
17 26 BTB-00926636 17857480 Milk 17809768 17809772 195264
18 26 BTB-00926636 17857480 Milk 17809768 17809772 200269
19 26 BTB-00926636 17857480 Milk 17809768 17809772 200270
20 26 BTB-00926636 17857480 Milk 17809768 17809772 205295
21 26 BTB-00926636 17857480 Milk 17810363 17810367 198859
22 26 BTB-00926636 17857480 Milk 17810363 17810367 203868
23 26 BTB-00926636 17857480 Milk 17810363 17810367 203869
24 26 BTB-00926636 17857480 Milk 17810363 17810367 210297
25 26 BTB-00926636 17857480 Milk 17811021 17811025 198858
26 26 BTB-00926636 17857480 Milk 17811021 17811025 203866
27 26 BTB-00926636 17857480 Milk 17811021 17811025 203867
28 26 BTB-00926636 17857480 Milk 17811021 17811025 210296
29 26 BTB-00926636 17857480 Milk 17811725 17811729 197493
30 26 BTB-00926636 17857480 Milk 17811725 17811729 202704
31 26 BTB-00926636 17857480 Milk 17812273 17812277 195685
32 26 BTB-00926636 17857480 Milk 17812273 17812277 200785
33 26 BTB-00926636 17857480 Milk 17813212 17813216 195285
34 26 BTB-00926636 17857480 Milk 17813212 17813216 200301
35 26 BTB-00926636 17857480 Milk 17813978 17813982 196236
36 26 BTB-00926636 17857480 Milk 17813978 17813982 201468
37 26 BTB-00926636 17857480 Milk 17815237 17815241 198002
38 26 BTB-00926636 17857480 Milk 17815237 17815241 203190
39 26 BTB-00926636 17857480 Milk 17817223 17817227 34750
40 26 BTB-00926636 17857480 Milk 17817223 17817227 195586
41 26 BTB-00926636 17857480 Milk 17821297 17821301 197235
42 26 BTB-00926636 17857480 Milk 17821297 17821301 202455
43 26 BTB-00926636 17857480 Milk 17822209 17822213 197783
44 26 BTB-00926636 17857480 Milk 17822209 17822213 202983
45 26 BTB-00926636 17857480 Milk 17823105 17823109 196204
46 26 BTB-00926636 17857480 Milk 17823105 17823109 201419
47 26 BTB-00926636 17857480 Milk 17824770 17824774 196867
48 26 BTB-00926636 17857480 Milk 17824770 17824774 202101
49 26 BTB-00926636 17857480 Milk 17833551 17833555 195748
50 26 BTB-00926636 17857480 Milk 17833551 17833555 200855
51 26 BTB-00926636 17857480 Milk 17833551 17833555 206213
52 26 BTB-00926636 17857480 Milk 17865069 17865073 197144
53 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 197144
54 26 BTB-00926636 17857480 Milk 17865069 17865073 202367
55 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 202367
56 26 BTB-00926636 17857480 Milk 17865069 17865073 208485
57 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 208485
58 26 BTB-00926636 17857480 Milk 17871201 17871205 197611
59 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 197611
60 26 BTB-00926636 17857480 Milk 17871201 17871205 202816
61 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 202816
62 26 BTB-00926636 17857480 Milk 17871201 17871205 209076
63 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 209076
64 26 BTB-00926636 17857480 Milk 17885871 17885875 63656
65 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 63656
66 26 BTB-00926636 17857480 Milk 17885871 17885875 199068
67 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 199068
68 26 BTB-00926636 17857480 Milk 17885871 17885875 204113
69 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204113
70 26 BTB-00926636 17857480 Milk 17885871 17885875 204114
71 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204114
72 26 BTB-00926636 17857480 Milk 17885871 17885875 210399
73 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 210399
74 26 BTB-00926636 17857480 Milk 17895273 17895277 63657
75 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 63657
76 26 BTB-00926636 17857480 Milk 17895273 17895277 199069
77 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 199069
78 26 BTB-00926636 17857480 Milk 17895273 17895277 204115
79 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204115
80 26 BTB-00926636 17857480 Milk 17895273 17895277 204116
81 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204116
82 26 BTB-00926636 17857480 Milk 17895273 17895277 210400
83 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 210400
84 26 BTB-00926636 17857480 Milk 17895803 17895807 199070
85 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 199070
86 26 BTB-00926636 17857480 Milk 17895803 17895807 204117
87 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204117
88 26 BTB-00926636 17857480 Milk 17895803 17895807 204118
89 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204118
90 26 BTB-00926636 17857480 Milk 17895803 17895807 210401
91 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 210401
92 26 BTB-00926636 17857480 Milk 17900304 17900308 199071
93 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 199071
94 26 BTB-00926636 17857480 Milk 17900304 17900308 204119
95 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204119
96 26 BTB-00926636 17857480 Milk 17900304 17900308 204120
97 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204120
98 26 BTB-00926636 17857480 Milk 17900304 17900308 210402
99 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 210402
100 26 BTB-00926636 17857480 Milk 17902932 17902936 63658
101 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 63658
102 26 BTB-00926636 17857480 Milk 17902932 17902936 199072
103 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 199072
104 26 BTB-00926636 17857480 Milk 17902932 17902936 204121
105 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204121
106 26 BTB-00926636 17857480 Milk 17902932 17902936 204122
107 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204122
108 26 BTB-00926636 17857480 Milk 17902932 17902936 210403
109 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 210403
110 26 BTB-00926636 17857480 Milk 17906861 17906865 63660
111 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 63660
112 26 BTB-00926636 17857480 Milk 17906861 17906865 199074
113 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 199074
114 26 BTB-00926636 17857480 Milk 17906861 17906865 204125
115 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204125
116 26 BTB-00926636 17857480 Milk 17906861 17906865 204126
117 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204126
118 26 BTB-00926636 17857480 Milk 17906861 17906865 210405
119 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 210405
120 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 63654
121 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 199076
122 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204129
123 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204130
124 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 210407
125 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 63655
126 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 197826
127 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203026
128 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203027
129 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 209354
130 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 195860
131 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201008
132 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201009
133 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 206462
134 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 33984
135 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195668
136 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195669
137 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195670
138 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 200758
139 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 199092
140 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 204151
141 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107048
142 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107227

21.1 QTL annotation on GALLO

QTL type

#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)

My Image

QTL type

#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

Reproduction My Image

Health My Image

21.2 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_name_2.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_type_2.csv")


#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
5 Milk C14 index 35 4437 108 163224 0.0000000 0.0000000 Milk
9 Milk myristoleic acid content 26 3047 108 163224 0.0000000 0.0000000 Milk
10 Milk palmitoleic acid content 15 2422 108 163224 0.0000000 0.0000000 Milk
6 Milk C16 index 12 2002 108 163224 0.0000000 0.0000001 Milk
12 Muscle carnosine content 3 67 108 163224 0.0000131 0.0000497 Meat and Carcass
14 Pregnancy rate 2 944 108 163224 0.1296627 0.4105985 Reproduction
18 Teat length 1 300 108 163224 0.1802436 0.4892327 Exterior
15 Rear leg placement - side view 1 430 108 163224 0.2479752 0.5235032 Exterior
17 Stillbirth 2 1363 108 163224 0.2280294 0.5235032 Reproduction
3 Foot angle 1 672 108 163224 0.3596272 0.6162877 Exterior
7 Milk capric acid content 1 912 108 163224 0.4541067 0.6162877 Milk
8 Milk myristic acid content 1 902 108 163224 0.4504612 0.6162877 Milk
13 Net merit 1 903 108 163224 0.4508269 0.6162877 Production
19 Udder depth 1 695 108 163224 0.3693423 0.6162877 Exterior
16 Somatic cell score 1 1122 108 163224 0.5253603 0.6654564 Health
2 Conception rate 1 1255 108 163224 0.5656375 0.6716945 Reproduction
1 Calving ease 2 3819 108 163224 0.7219243 0.7776744 Reproduction
4 Length of productive life 1 2004 108 163224 0.7367441 0.7776744 Production
11 Milk yield 1 6432 108 163224 0.9870080 0.9870080 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 4 9077 108 163224 0.8569775 0.9999953
2 Health 1 5889 108 163224 0.9811250 0.9999953
3 Meat and Carcass 3 18258 108 163224 0.9997109 0.9999953
4 Milk 91 75352 108 163224 0.0000000 0.0000000
5 Production 2 19640 108 163224 0.9999848 0.9999953
6 Reproduction 7 35008 108 163224 0.9999953 0.9999953

My Image

22 GPROFILER ON-LINE with BY and SD

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

23 Haplotype Blocks

First I need to generate the .ped file

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file

plink \
    --bfile ${DATA} \
    --cow \
    --recode \
    --out ${RESULT}

Second, is necessary to convert the .ped file to Linkage format:

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in

plink \
    --file ${DATA} \
    --cow \
    --recode HV \
    --out ${RESULT}

The code above will generate a pair of files .ped and .info for each chromosome

I ran the haploview only for chromosome 03, 11, 14, 19, 20, 22, 23, 26 and 28 that are when the significant SNP are located.

As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot

My Image

VEP rs SNP Chr bp A1 A2 Freq Haploview.Order Block
yes rs109888380 ARS-BFGL-NGS-4585 3 107160292 A C 0.3353170 1818
yes rs41624254 BTA-122098-no-rs 11 31882807 A C 0.4444440 674
no rs42217767 BTB-01060598 11 32301594 C A 0.4503970 680
yes rs41632193 Hapmap39823-BTA-35254 14 64061208 A C 0.2916670 1196
yes rs41632223 UA-IFASA-7664 14 64299714 C A 0.2678570 1200
yes rs109751680 ARS-BFGL-NGS-28901 19 42338148 C A 0.3710320 713
yes rs41577586 ARS-BFGL-NGS-29119 19 49744592 A C 0.4047620 849 Block 38
yes rs42590312 BTB-01471673 20 11545685 A C 0.2460320 232
yes rs110031217 ARS-BFGL-NGS-23411 22 5134078 A C 0.1865080 88
yes rs110991998 ARS-BFGL-NGS-26419 22 46052082 A C 0.0813492 762
no rs29022819 Hapmap54016-rs29022819 23 33609998 C A 0.1904760 534
yes rs41644634 BTA-62125-no-rs 26 17899619 C A 0.0257937 297 Block 10
yes rs42089058 BTB-00926636 26 17857480 C A 0.0257937 296 Block 10
yes rs109578343 ARS-BFGL-NGS-11935 28 44942879 A C 0.0297619 773
yes rs41567074 BTA-99379-no-rs 28 41666186 A C 0.0496032 701
yes rs41648479 Hapmap43005-BTA-64606 28 45805105 A C 0.0297619 791 Block 21

As can be observed in the column “Block”,four significant SNPs fell within a Haploblock.

rs41577586 (ARS-BFGL-NGS-29119) - chr 19 - order 849 - Block 38 rs41644634 (BTA-62125-no-rs) - chr 26 - order 297 - Block 10 rs42089058 (BTB-00926636) - chr 26 - order 296 - Block 10 rs41648479 (Hapmap43005-BTA-64606) - chr 28 - order 791 - Block 21

Chr 3 (SNP 1818) My Image

Chr 11 (SNP 674 and 680) My Image

Chr 14 (SNP 1196 and 1200) My Image

Chr 19 (SNP 713) My Image

Chr 19 (SNP 849) My Image

Chr 20 (SNP 232) My Image

Chr 22 (SNP 88) My Image

Chr 22 (SNP 762) My Image

Chr 23 (SNP 534) My Image

Chr 26 (SNP 296 and 297) My Image

Chr 28 (SNP 701) My Image

Chr 28 (SNP 773 and 791) My Image

24 GWAS - EXTREME PHENO - WITH BY and SD

24.1 Data preparation

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H", 
                           ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]

# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]

#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)

#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)

We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals

On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt

plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}

24.2 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

After ran the GWAS above I got the following message from the GCTA:

Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.

As we got this error message, we needed to solve this problem, and for that we used the whole sample size (252 individuals) to estimate the variance components in GCTA, and after this, using this variance components from the whole sample we perfomed ths GWAS with the subset of individuals (34 High + 37 Low = 71)

24.3 BLUPF90+ GWAS

To run ssGWAS on Blupf90+ suite, we will need 4 different softwares:

  • renum: just to renumerate the files and generate the renf90.par
  • preGSF90: just to perfome a quality control with different parameter from the default.
  • blupf90+: used to estimate VCE and generate Ainv and Ginv
  • postGSF90: perform GWAS

The tutorial for preGSF90 and postGSF90 we can find in the link bellow https://nce.ads.uga.edu/wiki/doku.php?id=readme.pregsf90#gwas_options_postgsf90

According to the BLUPF90+ tutorial:

ssGWAS is an iterative procedure with 2 steps:
0) Quality control
1) prediction of GEBV with ssGBLUP
2) prediction of SNP marker effects based on the GEBV

24.3.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • preGSf90 executable file
  • postGSf90 executable file
  • Parameter file

      24.3.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Phenotype
      THIRD COLUMN = Fixed Effect 1
      FOURTH COLUMN = Fixed Effect 2

      First we are going to generate a Phenotype_Fixed_Effect file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      To get in one file these four columns we need the following code:

      fixeff <- read.table("/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt", header = T)
      pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
      ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
      
      fixeff <- fixeff[, c("cdn_id", "BIRTH_YEAR", "Sampling_date")]
      pheno <- pheno[,c("cdn_id", "cortisol")]
      
      # Load necessary libraries
      library(dplyr)
      
      # Merge pheno and fixeff data frames
      merged_data <- pheno %>% 
        left_join(fixeff, by = "cdn_id")
      
      
      merged_data$iid <- ids_eq$iid[match(merged_data$cdn_id, ids_eq$cdn_id)]
      
      merged_data <- merged_data[, c("iid", "cortisol", "BIRTH_YEAR", "Sampling_date")]
      
      write.table(merged_data, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt", col.names = F, quote = F, row.names = F)

      The file should be saved as text file, with separation by space and no columns names.

      PS: if there are any NA, they sould be replaced by 9999

      24.3.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Sire ID
      THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      # to replace comma for space in the .csv file with the equivalence among IDs
      sed -i 's/,/ /g' bruno_ids.csv

      24.3.1.3 Genotype file

      First we are going to generate a Genotype file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

24.3.2 Download the executable files

Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
  • BlupF90+ = we will use to estimate the Variance components and GEBVs
  • renumF90 = we will use to renumerate the files
  • preGSf90 = we will use to perform the Quality control
  • postGSf90 = we will use for GWAS

24.3.3 SNP MAP

mapfile <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/DGVsnpinfo.2404.ho", header = T)

colnames(mapfile)

colnames(mapfile) <- c("SNP_ID", "CHR", "POS")

mapfile <- mapfile[,c("CHR", "POS", "SNP_ID")]

write.table(mapfile, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/snpmap.txt", col.names = T, row.names = F, quote = F)

24.3.4 Parameter file for Quality Control

But, before running the GEBV we will first perform one additional step to “CLEAN” our genotypes. Actually BLUPF90 by default perform a data cleaning with pre set parameters, but as HWE is not used by default, we will perform this additional step.

The Parameter card for this step is the parameter bellow:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renum_QC.par


DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
1.0
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid
PED_DEPTH
0
(CO)VARIANCES
1.0
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
  • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
  • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
  • FIELDS_PASSED TO OUTPUT:
  • WEIGHT (S):
  • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
  • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
  • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
  • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
  • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
  • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
  • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
  • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
  • OPTION outcallrate: Save the call rate information on SNP markers in the file.
  • OPTION saveCleanSNPs: This option generates 4 new files. We assume snpfile as a marker file.
    • snpfile_clean = new SNP marker file.
    • snpfile_clean_XrefID = new cross-reference file.
    • snpfile_SNPs_removed = a list of removed markers.
    • snpfile_Animals_removed = a list of removed animals.
  • OPTION minfreq 0.01: Minimum allele frequency to retain the marker.
  • OPTION map_file snpmap.txt: This option will upload the SNP MAP
  • OPTION excludeCHR 30 31: This option will remove sexual chromosome that is the 30 and 31

To run any softwere from Blupf90 suit we will perfom always in this way:

  1. Go to the server you wanna run this analysis, for instance, grand
ssh grand
  1. Now go to the directory you have created to run this analysis where that set of files are placed.
cd /home/bambrozi/2_CORTISOL/GWAS/BLUPF90
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will ask you the name of your parameter card, for this step is renum_QC.par.

The command above will generate couple files, among them renf90.par

We modified renf90.par in:
  • renf90_DataClean.par
  • renf90_VarCompEst.par

The parameter card to perform the Quality Control is: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_DataClean.par

It will be run using the software pre preGS90 to generate the Clean Genotype and SNP_MAP files after Quality Control.

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

The second parameter card used for Variance Components Estimation (VCE) is the following: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_VarCompEst.par

It will be run using the software pre blupf90+ to generate the VCE.

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid_clean
OPTION no_quality_control
OPTION method VCE
OPTION origID
OPTION missing 9999
OPTION se_covar_function H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
  • OPTION SNP_file bruno_gntps_iid_clean: we are going to inform the genotype file generated in the previous step (the Quality Control). Blup will create an file with the same name that the original genotype file, and add the sufix **_clean**
  • OPTION no_quality_control we need to set up this option because we performed Quality Control in the previous step and now we don’t need that Blupf90+ perform again. Blupf90+ by default perform quality control, so if we do not want, we need to specify.
  • OPTION method: VCE (Variance Component Estimation).
  • OPTION OrigID: this will keep the original ID informed.
  • OPTION missing 9999: you are informing that missing values will appear as 9999
  • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
    • H2_1: the name that your function will appear on the output files.
    • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
    • **_1_1**: this effect is in the 1st column.
    • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

In the parameter card above, we remove the option for Quality Control and added options for Variance Components Estimation, for Missing data, for origID and for heritability estimation, but the MOST IMPORTANT PART is we need to change the OPTION SNP_FILE, replacing the original genotype file, for the “clean” version generated in the previous step.

The Variance Components will be placed in the file: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/blupf90.log

blupf90.log
My Image

Now you should update you renf90_VarCompEst.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90_VarCompEst.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90_VarCompEst.par (CO) VARIANCE

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

blupf90.log
My Image

Parameter card
My Image

Now that we have the Variance components we go for the next step:
  • Prediction of SNP marker effects based on the GEBV
  • GWAS for High and Low cortisol animals
To do this, I’ll:
  • generate the Phenotype_FixEff file like bellow
  • generate the genotype file like bellow
  • Perform the QUALITY CONTROL for the new (71 samples) genotype file
  • Add the VCE from the 252 data set in the parameter card
  • Generate the GEBV
  • Run GWAS

24.4 Prediction of SNP marker effects based on the GEBV

24.4.1 GWAS for High and Low cortisol animals

Now we’ll run a new analysis using the Variance Components Estimation from the previous step to perform the GWAS.

To perform this first we need a Phenotype file, and a Genotype file only with the 71 animals (High=34 and Low=37)

24.4.1.1 Updating the files

24.4.1.1.1 Phenotype and Fixed Effects files

I used the code bellow to remove individuals with MEDIUM cortisol Phenotype

phenofix <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt")


# Now we are gona remove the intermediary animals from pheno object
phenofix$Categorical <- ifelse(phenofix$V2 >= 956, "H", 
                            ifelse(phenofix$V2 <= 190.8, "L", "M"))
table(phenofix$Categorical)

phenofix <- phenofix[phenofix$Categorical != "M", ]

phenofix <- phenofix[, c("V1", "V2", "V3", "V4")]

write.table(phenofix, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/fenofix.txt", col.names = F, row.names = F, quote = F)
24.4.1.1.2 Genotype files

I used this command line bellow to remove the individuals with MEDIUM phenotypes.

awk 'NR==FNR{ids[$1]; next} $1 in ids' fenofix.txt bruno_gntps_iid > bruno_gntps_iid_71

24.4.1.2 Running renum_QC.par

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renum_QC.par

DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
77182
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid_71
PED_DEPTH
0
(CO)VARIANCES
28212
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
We modified renf90.par in e copies:
  • renf90_DataClean.par
  • renf90_ssGWAS1_Ginv.par
  • renf90_ssGWAS2_SNPeff.par

24.4.1.3 Running renf90_DataClean.par for Quality Control

To run the parameter card bellow we are going to use the software presGSf90 /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renf90_DataClean.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

24.4.1.4 Running renf90_ssGWAS1_Ginv.par for Ginv estimation

May be necessary to run the command bellow on the server Setting the stack size to “unlimited” allows the program to allocate memory for these large structures without hitting stack limits. By removing stack size limits, BLUPF90 is less likely to encounter segmentation faults or memory allocation issues that arise when the stack space is insufficient for the computations needed.

ulimit -s unlimited

The parameter card bellow we are going to run using the software blupf90+:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renf90_ssGWAS1_Ginv.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION no_quality_control
OPTION origID
OPTION missing 9999
OPTION saveGInverse
OPTION saveA22Inverse
OPTION snp_p_value

24.4.1.5 Running renf90_ssGWAS2_SNPeff.par for GWAS

The parameter card bellow we are going to run using the software postGSf90:

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot 
This code will generate several files, among them:
  • chrsnp_pval:
    • Column 1: trait
    • Column 2: effect
    • Column 3: -log10(p-value)
    • Column 4: SNP
    • Column 5: Chromosome
    • Column 6: Position in bp
  • Pft1e3.R: a r-code to generate the Manhattan plot in R using the chrsnp_pval

24.4.1.6 Manhattan Plots for BLUPF90 GWAS

24.4.1.6.1 Genome Independent Segment

To make the Manhattan Plot considering Genome Independent Segment we should run the code bellow. This code has part of the code in the file Pft1e3.R

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO")
# Read in and process data for Manhattan plot
yyy1 <- read.table("chrsnp_pval")
yyy <- yyy1[order(yyy1$V4), ]
zzz <- yyy[which(yyy$V1 == 1 & yyy$V2 == 3), ]
n <- nrow(zzz)
y <- zzz[, 4]
x <- zzz[, 3]
chr1 <- zzz[, 5]
chr <- NULL
pos <- NULL

for (i in unique(yyy$V5)) {
  zz <- yyy[yyy$V5 == i, ]
  key <- zz$V4
  medio <- round(nrow(zz) / 2, 0)
  z <- key[medio]
  pos <- c(pos, z)
}

chrn <- unique(yyy$V5)
one <- which(chr1 %% 4 == 0)
two <- which(chr1 %% 4 == 1)
three <- which(chr1 %% 4 == 2)
four <- which(chr1 %% 4 == 3)
chr[one] <- "darkgoldenrod"
chr[two] <- "darkorchid"
chr[three] <- "blue"
chr[four] <- "forestgreen"

# Create Manhattan plot with Bonferroni line and legend
pdf(file = "Pft1e3_manplot_with_bonf_ind_seg.pdf", family = "sans", height = 27.8, width = 50, pointsize = 20, bg = "white")
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)

# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)

# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)

axis(1, at = pos, labels = chrn)
dev.off()

My Image

For additional analysis like Variant Effect Prediction (VEP) we need the rsID, to get the rsID we use the software SNPChimp which requires SNP_names, but the BLUPF90 output have only the Chromosome and Position of the SNPs, so we are going to perfome these two steps to get one file with t he significant SNPs + SNP_name + rsID

24.4.1.6.1.1 Step 01 = Bring the SNP name to GWAS output

For this analysis we have to build this new sheet bringing SNP ID from snpmap.txt

gwas = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval")
colnames(gwas) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
gwas <- filter(gwas, LOG_P >= bonf)

snpmap <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/snpmap.txt", header = T)

# Filter snpmap to only include rows that match the CHR and BP values in out_genes
filtered_snpmap <- snpmap[snpmap$CHR %in% gwas$CHR & snpmap$POS %in% gwas$BP, ]

# Merge the filtered snpmap with out_genes
gwas_snpname <- merge(gwas, filtered_snpmap[, c("CHR", "POS", "SNP_ID")], 
                   by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), 
                   all.x = TRUE)

gwas_snpname <- gwas_snpname[,c("CHR", "BP", "LOG_P", "SNP_ID")]

write.csv(gwas_snpname, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval_SNPid_ind_seg_sig_BLUPF90.csv")
X CHR BP LOG_P SNP_ID rsID X.1 rs SNP_name chromosome position
1 11 19779915 4.828190 Hapmap55558-rs29013980 NA NA rs110160157 ARS-BFGL-NGS-12139 0 0
2 12 17477322 4.417576 BTB-00488482 NA NA rs42751504 ARS-BFGL-NGS-37809 0 0
3 14 15929822 5.467439 ARS-BFGL-NGS-82859 NA NA rs43691687 BTB-00488482 0 0
4 15 33066384 5.041271 BTB-00594449 NA NA rs42305073 BTB-01148543 0 0
5 15 57930281 4.514377 ARS-BFGL-NGS-30515 NA NA rs29013980 Hapmap55558-rs29013980 11 19803705
6 17 46487068 4.547766 ARS-BFGL-NGS-12510 NA NA rs43691687 BTB-00488482 12 16563156
7 17 47884497 6.622354 ARS-BFGL-NGS-87412 NA NA rs110506037 ARS-BFGL-NGS-82859 14 15419973
8 17 48800954 5.268940 ARS-BFGL-NGS-112149 NA NA rs41764450 BTB-00594449 15 31708030
9 2 118820218 4.420962 Hapmap53065-rs29026778 NA NA rs41764450 BTB-00594449 15 33601229
10 2 41602429 4.497977 BTB-00096979 NA NA rs110565206 ARS-BFGL-NGS-30515 15 57514944
11 2 41670655 4.377284 Hapmap48777-BTA-47434 NA NA rs110565206 ARS-BFGL-NGS-30515 15 58709858
12 20 60365668 4.603817 BTB-01341053 NA NA rs110038841 ARS-BFGL-NGS-12510 17 47484553
13 20 65351653 4.891206 ARS-BFGL-NGS-91119 NA NA rs109273103 ARS-BFGL-NGS-87412 17 48889044
14 21 4055731 4.493938 ARS-BFGL-NGS-112210 NA NA rs110038841 ARS-BFGL-NGS-12510 17 48998486
15 23 44822126 4.316434 ARS-BFGL-NGS-115605 NA NA rs109276211 ARS-BFGL-NGS-112149 17 49828442
16 24 857728 4.458018 Hapmap47669-BTA-59022 NA NA rs109273103 ARS-BFGL-NGS-87412 17 50583981
17 28 20016672 5.130557 ARS-BFGL-NGS-116552 NA NA rs109276211 ARS-BFGL-NGS-112149 17 51512244
18 28 35807366 4.695875 ARS-BFGL-NGS-71077 NA NA rs43305418 BTB-00096979 2 41717655
19 29 33039863 4.933140 ARS-BFGL-NGS-41631 NA NA rs41636137 Hapmap48777-BTA-47434 2 41786376
20 3 110866523 4.815943 ARS-BFGL-NGS-118207 NA NA rs43305418 BTB-00096979 2 43072293
21 3 111526170 4.760351 ARS-BFGL-NGS-74948 NA NA rs41636137 Hapmap48777-BTA-47434 2 43141034
22 3 111730561 4.390707 BTB-01641394 NA NA rs29026778 Hapmap53065-rs29026778 2 119630462
23 3 111751663 4.390707 ARS-BFGL-NGS-37809 NA NA rs29026778 Hapmap53065-rs29026778 2 122995670
24 3 111772736 5.088738 ARS-BFGL-NGS-25298 NA NA rs42462935 BTB-01341053 20 60447656
25 4 103979600 5.591006 ARS-BFGL-NGS-110705 NA NA rs42462935 BTB-01341053 20 64066021
26 4 107787202 5.375628 ARS-BFGL-NGS-45265 NA NA rs109575643 ARS-BFGL-NGS-91119 20 65467968
27 4 109568557 4.709882 Hapmap48062-BTA-72409 NA NA rs109575643 ARS-BFGL-NGS-91119 20 69096095
28 4 110053134 5.245598 UA-IFASA-2147 NA NA rs109631116 ARS-BFGL-NGS-112210 21 2685189
29 4 24239851 4.641241 Hapmap60681-rs29013301 NA NA rs109631116 ARS-BFGL-NGS-112210 21 4193189
30 4 25933587 4.352451 Hapmap50554-BTA-107048 NA NA rs42029843 ARS-BFGL-NGS-115605 23 44680105
31 4 26511448 4.537662 Hapmap59011-rs29027498 NA NA rs42029843 ARS-BFGL-NGS-115605 23 45514921
32 4 27021431 5.045447 Hapmap59743-rs29017061 NA NA rs41645754 Hapmap47669-BTA-59022 24 595659
33 4 27094376 5.485063 BTB-00170785 NA NA rs41645754 Hapmap47669-BTA-59022 24 1094942
34 4 50755462 5.015905 ARS-BFGL-NGS-12139 NA NA rs110428837 ARS-BFGL-NGS-116552 28 18933788
35 4 7873471 4.610767 Hapmap60503-rs29018741 NA NA rs110428837 ARS-BFGL-NGS-116552 28 20096355
36 4 95650788 4.825103 Hapmap58854-rs29023486 NA NA rs109584097 ARS-BFGL-NGS-71077 28 35297722
37 6 36184467 4.758110 Hapmap23854-BTC-062412 NA NA rs109584097 ARS-BFGL-NGS-71077 28 36010482
38 7 110306791 4.609691 BTB-01148543 NA NA rs110121846 ARS-BFGL-NGS-41631 29 33593218
NA NA NA NA NA NA rs110121846 ARS-BFGL-NGS-41631 29 34563419
NA NA NA NA NA NA rs110081798 ARS-BFGL-NGS-118207 3 111458159
NA NA NA NA NA NA rs41585925 ARS-BFGL-NGS-74948 3 112135228
NA NA NA NA NA NA rs42752353 BTB-01641394 3 112340357
NA NA NA NA NA NA rs42751504 ARS-BFGL-NGS-37809 3 112361478
NA NA NA NA NA NA rs109868537 ARS-BFGL-NGS-25298 3 112382551
NA NA NA NA NA NA rs110081798 ARS-BFGL-NGS-118207 3 118126544
NA NA NA NA NA NA rs41585925 ARS-BFGL-NGS-74948 3 118803985
NA NA NA NA NA NA rs42752353 BTB-01641394 3 119008234
NA NA NA NA NA NA rs42751504 ARS-BFGL-NGS-37809 3 119029352
NA NA NA NA NA NA rs29018741 Hapmap60503-rs29018741 4 7779209
NA NA NA NA NA NA rs29018741 Hapmap60503-rs29018741 4 8585313
NA NA NA NA NA NA rs29013301 Hapmap60681-rs29013301 4 24183919
NA NA NA NA NA NA rs29013301 Hapmap60681-rs29013301 4 25355321
NA NA NA NA NA NA rs41615935 Hapmap50554-BTA-107048 4 25913130
NA NA NA NA NA NA rs29027498 Hapmap59011-rs29027498 4 26490406
NA NA NA NA NA NA rs29017061 Hapmap59743-rs29017061 4 26995149
NA NA NA NA NA NA rs43377276 BTB-00170785 4 27067897
NA NA NA NA NA NA rs41615935 Hapmap50554-BTA-107048 4 27162987
NA NA NA NA NA NA rs29027498 Hapmap59011-rs29027498 4 27732337
NA NA NA NA NA NA rs29017061 Hapmap59743-rs29017061 4 28239398
NA NA NA NA NA NA rs43377276 BTB-00170785 4 28312330
NA NA NA NA NA NA rs110160157 ARS-BFGL-NGS-12139 4 51047434
NA NA NA NA NA NA rs110160157 ARS-BFGL-NGS-12139 4 52798744
NA NA NA NA NA NA rs29023486 Hapmap58854-rs29023486 4 96436124
NA NA NA NA NA NA rs29023486 Hapmap58854-rs29023486 4 98848662
NA NA NA NA NA NA rs110079750 ARS-BFGL-NGS-110705 4 104816971
NA NA NA NA NA NA rs110079750 ARS-BFGL-NGS-110705 4 107675833
NA NA NA NA NA NA rs110935391 ARS-BFGL-NGS-45265 4 108608248
NA NA NA NA NA NA rs41566051 Hapmap48062-BTA-72409 4 110354858
NA NA NA NA NA NA rs29012492 UA-IFASA-2147 4 110839255
NA NA NA NA NA NA rs110935391 ARS-BFGL-NGS-45265 4 111954676
NA NA NA NA NA NA rs41566051 Hapmap48062-BTA-72409 4 113822987
NA NA NA NA NA NA rs29012492 UA-IFASA-2147 4 114307093
NA NA NA NA NA NA rs81154019 Hapmap23854-BTC-062412 6 37610880
24.4.1.6.1.2 Step 02 = Bring the rsID to the file with SNP name.

After get the rsID from SNPCHIMP I produced this table with the code bellow:

rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/SNPchimp_result_1723415732_BLUPF90GWAS.tsv", header = T)

merged <- merge(rsid, gwas_snpname, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "LOG_P")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "LOG_P")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/gwas_ind_seg_sig_SNPname_rsID.csv")
X SNP_name rsID CHR BP LOG_P
1 ARS-BFGL-NGS-110705 rs110079750 4 103979600 5.591006
2 ARS-BFGL-NGS-112149 rs109276211 17 48800954 5.268940
3 ARS-BFGL-NGS-112210 rs109631116 21 4055731 4.493938
4 ARS-BFGL-NGS-115605 rs42029843 23 44822126 4.316434
5 ARS-BFGL-NGS-116552 rs110428837 28 20016672 5.130557
6 ARS-BFGL-NGS-118207 rs110081798 3 110866523 4.815943
7 ARS-BFGL-NGS-12139 rs110160157 4 50755462 5.015905
8 ARS-BFGL-NGS-12510 rs110038841 17 46487068 4.547766
9 ARS-BFGL-NGS-25298 rs109868537 3 111772736 5.088738
10 ARS-BFGL-NGS-30515 rs110565206 15 57930281 4.514377
11 ARS-BFGL-NGS-37809 rs42751504 3 111751663 4.390707
12 ARS-BFGL-NGS-41631 rs110121846 29 33039863 4.933140
13 ARS-BFGL-NGS-45265 rs110935391 4 107787202 5.375628
14 ARS-BFGL-NGS-71077 rs109584097 28 35807366 4.695875
15 ARS-BFGL-NGS-74948 rs41585925 3 111526170 4.760351
16 ARS-BFGL-NGS-82859 rs110506037 14 15929822 5.467439
17 ARS-BFGL-NGS-87412 rs109273103 17 47884497 6.622354
18 ARS-BFGL-NGS-91119 rs109575643 20 65351653 4.891206
19 BTB-00096979 rs43305418 2 41602429 4.497977
20 BTB-00170785 rs43377276 4 27094376 5.485063
21 BTB-00488482 rs43691687 12 17477322 4.417576
22 BTB-00594449 rs41764450 15 33066384 5.041271
23 BTB-01148543 rs42305073 7 110306791 4.609691
24 BTB-01341053 rs42462935 20 60365668 4.603817
25 BTB-01641394 rs42752353 3 111730561 4.390707
26 Hapmap23854-BTC-062412 rs81154019 6 36184467 4.758110
27 Hapmap47669-BTA-59022 rs41645754 24 857728 4.458018
28 Hapmap48062-BTA-72409 rs41566051 4 109568557 4.709882
29 Hapmap48777-BTA-47434 rs41636137 2 41670655 4.377284
30 Hapmap50554-BTA-107048 rs41615935 4 25933587 4.352451
31 Hapmap53065-rs29026778 rs29026778 2 118820218 4.420962
32 Hapmap55558-rs29013980 rs29013980 11 19779915 4.828190
33 Hapmap58854-rs29023486 rs29023486 4 95650788 4.825103
34 Hapmap59011-rs29027498 rs29027498 4 26511448 4.537662
35 Hapmap59743-rs29017061 rs29017061 4 27021431 5.045447
36 Hapmap60503-rs29018741 rs29018741 4 7873471 4.610767
37 Hapmap60681-rs29013301 rs29013301 4 24239851 4.641241
38 UA-IFASA-2147 rs29012492 4 110053134 5.245598

24.4.1.7 WINDOWS

24.4.1.8 Running renf90_ssGWAS2_SNPeff_w.par for GWAS (WINDOWS)

The parameter card bellow we are going to run using the software postGSf90:

renf90_ssGWAS2_SNPeff_W_10.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
OPTION SNP_moving_average 10
OPTION windows_variance 10 
The parameter file above will generate the following files:
  • snp_sol
    • column 1: trait
    • column 2: effect
    • column 3: SNP
    • column 4: Chromosome
    • column 5: Position
    • column 6: SNP solution
    • column 7: weight
    • column 8: variance explained by n adjacents SNP (if OPTION windows_variance is used).
    • column 9: variance of the SNP solution (used to compute the p-value) (if OPTION snp_p_value is used)
  • snp_pred
    • contains allele frequencies + SNP effects
  • chrsnpvar
    • column 1: trait
    • column 2: effect
    • column 3: variance explained by n adjacents SNP
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • chrsnp_pval
    • column 1: trait
    • column 2: effect
    • column 3: -log10(p-value)
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position in bp
  • chrsnp
    • column 1: trait
    • column 2: effect
    • column 3: values of SNP effects to use in Manhattan plots → [abs(SNP_i)/SD(SNP)]
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • windows_segments
    • column 1: label
    • column 2: window size (number of SNP)
    • column 3: Start SNP number for the window
    • column 4: End SNP number for the window
    • column 5: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
  • windows_variance
    • column 1: trait
    • column 2: effect
    • column 3: Start SNP number or SNP name for the window
    • column 4: End SNP number or SNP name for the window
    • column 5: window size (number of SNP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
    • column 8: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 9: variance explained by n adjacents SNP
  • Vft1e3.R
  • Sft1e3.R
  • Pft1e3.R

24.5 Match Significant p-value and Windows variance bigger than 0.5%

#Only significant SNPs after Genome Independent Segments adjust
sigPvalue <- read.csv("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval_SNPid_ind_seg_sig_BLUPF90.csv")
sigPvalue <- sigPvalue[, c("CHR", "BP", "LOG_P", "SNP_ID")]

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.5)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")

snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]

# Get indices in w_var where SNP_ID matches those in sigPvalue
indices <- which(w_var$SNP_ID %in% sigPvalue$SNP_ID)

# Optionally extract the rows from w_var based on these indices
matching_rows <- w_var[indices, ]

write.csv(matching_rows, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/pvalue_windowsVar_match.csv")
write.csv(w_var, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/windowsVar_min05.csv")

sum(matching_rows$Var)

The 3 SNPs bellow were significant associated to cortisol considering the p-value adjusted for genome independent segments and also individually explain more than 0.5% of the variance.

X CHR BP Var SNP_ID
36 3 111730561 0.9480214 BTB-01641394
37 3 111751663 1.0607739 ARS-BFGL-NGS-37809
38 3 111772736 1.2101267 ARS-BFGL-NGS-25298

These 3 SNPs explain together 3,22% of Variance and all of them fall in the gene CSMD2

Next we need to check the allele frequency of this SNPs.

24.6 BLUPF90+ GALLO

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval")
colnames(gwas) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
gwas <- filter(gwas, LOG_P >= bonf)


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_genes_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_genes_50k.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_qtl_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_qtl_50k_clean.csv")

The GALLO output are bellow:

For GENES

X V1 V2 LOG_P SNP CHR BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 1 3 4.420962 4632 2 118820218 2 118803760 118824701 20942 + ENSBTAG00000030718 SPATA3 protein_coding
2 1 3 4.420962 4632 2 118820218 2 118831062 118836777 5716 + ENSBTAG00000054626 NA lncRNA
3 1 3 4.420962 4632 2 118820218 2 118846915 118854132 7218 + ENSBTAG00000026710 C2H2orf72 protein_coding
4 1 3 4.420962 4632 2 118820218 2 118865174 118946608 81435 + ENSBTAG00000005119 PSMD1 protein_coding
5 1 3 4.377284 3498 2 41670655 2 41676135 42259624 583490 - ENSBTAG00000005562 GALNT13 protein_coding
6 1 3 4.815943 6862 3 110866523 3 110769753 110827938 58186 + ENSBTAG00000013867 DLGAP3 protein_coding
7 1 3 4.815943 6862 3 110866523 3 110835276 110839536 4261 + ENSBTAG00000014235 SMIM12 protein_coding
8 1 3 4.815943 6862 3 110866523 3 110888198 110890904 2707 - ENSBTAG00000013881 GJA4 protein_coding
9 1 3 4.815943 6862 3 110866523 3 110900183 110905849 5667 - ENSBTAG00000012584 GJB3 protein_coding
10 1 3 4.760351 6873 3 111526170 3 111552563 111568286 15724 - ENSBTAG00000020798 C3H1orf94 protein_coding
11 1 3 4.390707 6880 3 111730561 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
12 1 3 4.390707 6881 3 111751663 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
13 1 3 5.088738 6882 3 111772736 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
14 1 3 4.537662 7515 4 26511448 4 26324012 26469689 145678 - ENSBTAG00000014074 SNX13 protein_coding
15 1 3 5.485063 7526 4 27094376 4 27093041 27667961 574921 + ENSBTAG00000003808 HDAC9 protein_coding
16 1 3 4.825103 8704 4 95650788 4 95631850 95631921 72 - ENSBTAG00000045386 bta-mir-320b miRNA
17 1 3 5.591006 8831 4 103979600 4 103877533 103947944 70412 - ENSBTAG00000004799 DENND2A protein_coding
18 1 3 5.591006 8831 4 103979600 4 103994401 104008963 14563 + ENSBTAG00000020484 ADCK2 protein_coding
19 1 3 5.591006 8831 4 103979600 4 104013524 104019157 5634 + ENSBTAG00000021759 NDUFB2 protein_coding
20 1 3 5.591006 8831 4 103979600 4 104027627 104194547 166921 - ENSBTAG00000021761 BRAF protein_coding
21 1 3 5.375628 8888 4 107787202 4 107701708 108020989 319282 + ENSBTAG00000054424 NA protein_coding
22 1 3 5.375628 8888 4 107787202 4 107718449 108101163 382715 - ENSBTAG00000017676 TPK1 protein_coding
23 1 3 4.709882 8904 4 109568557 4 109526182 109526350 169 - ENSBTAG00000053666 NA misc_RNA
24 1 3 4.610767 7184 4 7873471 4 7867490 7867588 99 - ENSBTAG00000045389 U6 snRNA
25 1 3 5.015905 7910 4 50755462 4 50743789 50957593 213805 - ENSBTAG00000006589 CFTR protein_coding
26 1 3 4.758110 11511 6 36184467 6 36052150 36198478 146329 - ENSBTAG00000010120 HERC3 protein_coding
27 1 3 4.609691 14772 7 110306791 7 110296879 110565532 268654 + ENSBTAG00000035662 CAMK4 protein_coding
28 1 3 4.828190 20558 11 19779915 11 19680235 19765027 84793 - ENSBTAG00000001114 PRKD3 protein_coding
29 1 3 4.828190 20558 11 19779915 11 19792170 19822758 30589 + ENSBTAG00000013923 QPCT protein_coding
30 1 3 5.467439 25167 14 15929822 14 15865228 15995482 130255 - ENSBTAG00000013537 FER1L6 protein_coding
31 1 3 5.467439 25167 14 15929822 14 15914441 15917042 2602 + ENSBTAG00000048373 NA protein_coding
32 1 3 5.041271 26782 15 33066384 15 32820702 33050885 230184 - ENSBTAG00000054820 NA lncRNA
33 1 3 4.547766 29742 17 46487068 17 46406767 46715519 308753 + ENSBTAG00000009797 RIMBP2 protein_coding
34 1 3 6.622354 29772 17 47884497 17 47426664 48084896 658233 + ENSBTAG00000002950 TMEM132D protein_coding
35 1 3 5.268940 29784 17 48800954 17 48417339 48868560 451222 - ENSBTAG00000046256 TMEM132C protein_coding
36 1 3 4.891206 33665 20 65351653 20 65311530 65343117 31588 - ENSBTAG00000009401 MTRR protein_coding
37 1 3 4.891206 33665 20 65351653 20 65343172 65354775 11604 + ENSBTAG00000009400 FASTKD3 protein_coding
38 1 3 4.891206 33665 20 65351653 20 65368557 65384042 15486 + ENSBTAG00000055034 CFAP90 protein_coding
39 1 3 4.891206 33665 20 65351653 20 65388705 65842986 454282 - ENSBTAG00000019210 ADCY2 protein_coding
40 1 3 4.493938 33836 21 4055731 21 3866347 4146441 280095 - ENSBTAG00000013422 GABRB3 protein_coding
41 1 3 4.316434 36688 23 44822126 23 44802116 44835671 33556 - ENSBTAG00000010256 TMEM170B protein_coding
42 1 3 4.458018 36876 24 857728 24 779627 866891 87265 - ENSBTAG00000000656 NFATC1 protein_coding
43 1 3 4.458018 36876 24 857728 24 879074 1029386 150313 - ENSBTAG00000001224 ATP9B protein_coding
44 1 3 4.695875 41057 28 35807366 28 35805434 35823873 18440 + ENSBTAG00000021416 PRXL2A protein_coding
45 1 3 4.695875 41057 28 35807366 28 35848163 35904370 56208 + ENSBTAG00000003907 TSPAN14 protein_coding
46 1 3 4.933140 41785 29 33039863 29 32993975 33024632 30658 - ENSBTAG00000003176 JAM3 protein_coding
47 1 3 4.933140 41785 29 33039863 29 33036823 33044208 7386 - ENSBTAG00000052082 NA lncRNA

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 2 3497 41602429 Health 41584083 41584087 32371
2 2 3497 41602429 Milk 41646649 41646653 113699
3 2 3498 41670655 Milk 41646649 41646653 113699
4 2 3498 41670655 Milk 41668170 41668174 113700
5 2 3498 41670655 Meat_and_Carcass 41676143 41676147 154062
6 2 3498 41670655 Reproduction 41693862 41693866 29899
7 2 4632 118820218 Milk 118779183 118779187 200902
8 2 4632 118820218 Milk 118779985 118779989 202500
9 2 4632 118820218 Milk 118781186 118781190 202905
10 2 4632 118820218 Milk 118785252 118785256 201638
11 2 4632 118820218 Milk 118789209 118789213 202885
12 2 4632 118820218 Reproduction 118820216 118820220 39599
13 2 4632 118820218 Reproduction 118820216 118820220 39600
14 2 4632 118820218 Reproduction 118820216 118820220 39601
15 2 4632 118820218 Exterior 118820216 118820220 39602
16 2 4632 118820218 Exterior 118820216 118820220 39603
17 2 4632 118820218 Milk 118820216 118820220 39604
18 2 4632 118820218 Production 118820216 118820220 39605
19 2 4632 118820218 Exterior 118820216 118820220 39606
20 2 4632 118820218 Milk 118820216 118820220 39607
21 2 4632 118820218 Production 118820216 118820220 39608
22 2 4632 118820218 Production 118820216 118820220 39609
23 2 4632 118820218 Exterior 118820216 118820220 39610
24 2 4632 118820218 Exterior 118820216 118820220 39611
25 2 4632 118820218 Production 118820216 118820220 39612
26 2 4632 118820218 Reproduction 118820216 118820220 39613
27 2 4632 118820218 Health 118820216 118820220 39614
28 2 4632 118820218 Exterior 118820216 118820220 39615
29 2 4632 118820218 Exterior 118820216 118820220 39616
30 2 4632 118820218 Reproduction 118820216 118820220 125235
31 3 6862 110866523 Meat_and_Carcass 110866521 110866525 152332
32 3 6873 111526170 Health 111487002 111487006 137202
33 3 6873 111526170 Health 111487002 111487006 170274
34 3 6873 111526170 Health 111509252 111509256 170275
35 3 6873 111526170 Health 111515878 111515882 170276
36 3 6880 111730561 Reproduction 111708234 111708238 30008
37 3 6881 111751663 Reproduction 111708234 111708238 30008
38 3 6880 111730561 Reproduction 111708234 111708238 30244
39 3 6881 111751663 Reproduction 111708234 111708238 30244
40 3 6880 111730561 Meat_and_Carcass 111708234 111708238 152258
41 3 6881 111751663 Meat_and_Carcass 111708234 111708238 152258
42 3 6880 111730561 Meat_and_Carcass 111717521 111717525 225527
43 3 6881 111751663 Meat_and_Carcass 111717521 111717525 225527
44 4 7184 7873471 Milk 7839854 7839858 118382
45 4 7184 7873471 Milk 7851689 7851693 118383
46 4 7184 7873471 Milk 7878682 7878686 113676
47 4 7184 7873471 Milk 7878682 7878686 118072
48 4 7471 24239851 Meat_and_Carcass 24248260 24248264 228128
49 4 7471 24239851 Reproduction 24279008 24279012 212516
50 4 7471 24239851 Meat_and_Carcass 24288010 24288014 107768
51 4 7471 24239851 Meat_and_Carcass 24288010 24288014 107796
52 4 7515 26511448 Exterior 26511446 26511450 66124
53 4 7524 27021431 Production 27014514 27014518 65173
54 4 7910 50755462 Meat_and_Carcass 50781007 50781011 228112
55 4 8831 103979600 Health 103949847 103949851 96218
56 4 8831 103979600 Meat_and_Carcass 104013536 104013540 231600
57 4 8888 107787202 Reproduction 107749162 107749166 106782
58 6 11511 36184467 Milk 36141496 36141500 244115
59 6 11511 36184467 Milk 36141496 36141500 246435
60 6 11511 36184467 Milk 36141496 36141500 250700
61 6 11511 36184467 Production 36143407 36143411 190189
62 6 11511 36184467 Production 36143929 36143933 187391
63 6 11511 36184467 Production 36145023 36145027 183480
64 6 11511 36184467 Production 36145023 36145027 190272
65 6 11511 36184467 Production 36145254 36145258 182513
66 6 11511 36184467 Production 36145254 36145258 186692
67 6 11511 36184467 Production 36147416 36147420 187702
68 6 11511 36184467 Production 36147705 36147709 186912
69 6 11511 36184467 Production 36148373 36148377 182603
70 6 11511 36184467 Production 36148373 36148377 188134
71 6 11511 36184467 Production 36149039 36149043 186551
72 6 11511 36184467 Production 36150874 36150878 183765
73 6 11511 36184467 Production 36150874 36150878 186794
74 6 11511 36184467 Production 36151323 36151327 183528
75 6 11511 36184467 Production 36151323 36151327 187598
76 6 11511 36184467 Production 36153011 36153015 188352
77 6 11511 36184467 Production 36154453 36154457 188950
78 6 11511 36184467 Production 36156604 36156608 187155
79 6 11511 36184467 Production 36156892 36156896 183350
80 6 11511 36184467 Production 36156892 36156896 187196
81 6 11511 36184467 Production 36157620 36157624 23700
82 6 11511 36184467 Production 36157620 36157624 66352
83 6 11511 36184467 Production 36157620 36157624 66353
84 6 11511 36184467 Production 36157620 36157624 66354
85 6 11511 36184467 Production 36157620 36157624 66355
86 6 11511 36184467 Production 36157620 36157624 67222
87 6 11511 36184467 Production 36157620 36157624 67223
88 6 11511 36184467 Production 36157620 36157624 164068
89 6 11511 36184467 Production 36157620 36157624 164167
90 6 11511 36184467 Meat_and_Carcass 36157620 36157624 164343
91 6 11511 36184467 Production 36157620 36157624 164369
92 6 11511 36184467 Production 36157620 36157624 164488
93 6 11511 36184467 Production 36157620 36157624 183351
94 6 11511 36184467 Production 36157620 36157624 186777
95 6 11511 36184467 Production 36157986 36157990 187308
96 6 11511 36184467 Production 36158359 36158363 187062
97 6 11511 36184467 Production 36158498 36158502 186952
98 6 11511 36184467 Production 36159606 36159610 183352
99 6 11511 36184467 Production 36159606 36159610 187046
100 6 11511 36184467 Production 36160694 36160698 187096
101 6 11511 36184467 Production 36161273 36161277 187575
102 6 11511 36184467 Production 36161375 36161379 186498
103 6 11511 36184467 Production 36161625 36161629 187580
104 6 11511 36184467 Production 36161633 36161637 187885
105 6 11511 36184467 Production 36162950 36162954 187283
106 6 11511 36184467 Production 36163340 36163344 187023
107 6 11511 36184467 Production 36163428 36163432 186547
108 6 11511 36184467 Reproduction 36163939 36163943 14703
109 6 11511 36184467 Reproduction 36163939 36163943 14707
110 6 11511 36184467 Production 36163939 36163943 189833
111 6 11511 36184467 Production 36164020 36164024 189834
112 6 11511 36184467 Production 36164564 36164568 187124
113 6 11511 36184467 Production 36165001 36165005 186965
114 6 11511 36184467 Production 36165137 36165141 187629
115 6 11511 36184467 Production 36165150 36165154 188219
116 6 11511 36184467 Production 36165218 36165222 186478
117 6 11511 36184467 Production 36165305 36165309 187300
118 6 11511 36184467 Production 36165645 36165649 186485
119 6 11511 36184467 Production 36166108 36166112 187795
120 6 11511 36184467 Production 36166178 36166182 187257
121 6 11511 36184467 Production 36166205 36166209 187129
122 6 11511 36184467 Production 36166298 36166302 187368
123 6 11511 36184467 Reproduction 36166503 36166507 14704
124 6 11511 36184467 Milk 36166503 36166507 105821
125 6 11511 36184467 Production 36166503 36166507 189279
126 6 11511 36184467 Production 36166791 36166795 189278
127 6 11511 36184467 Production 36166795 36166799 182735
128 6 11511 36184467 Production 36166795 36166799 189277
129 6 11511 36184467 Production 36167325 36167329 183190
130 6 11511 36184467 Production 36167325 36167329 186569
131 6 11511 36184467 Production 36167828 36167832 183191
132 6 11511 36184467 Production 36167828 36167832 188015
133 6 11511 36184467 Production 36168124 36168128 183305
134 6 11511 36184467 Production 36168124 36168128 186440
135 6 11511 36184467 Production 36168592 36168596 182749
136 6 11511 36184467 Production 36168592 36168596 186494
137 6 11511 36184467 Production 36168720 36168724 182760
138 6 11511 36184467 Production 36168720 36168724 186560
139 6 11511 36184467 Production 36169712 36169716 187059
140 6 11511 36184467 Production 36169792 36169796 186511
141 6 11511 36184467 Milk 36170113 36170117 140218
142 6 11511 36184467 Production 36170133 36170137 186638
143 6 11511 36184467 Production 36170199 36170203 187467
144 6 11511 36184467 Production 36171684 36171688 188463
145 6 11511 36184467 Production 36172370 36172374 187237
146 6 11511 36184467 Milk 36172376 36172380 246431
147 6 11511 36184467 Production 36177199 36177203 183152
148 6 11511 36184467 Production 36177199 36177203 188011
149 6 11511 36184467 Production 36180643 36180647 187727
150 6 11511 36184467 Production 36181019 36181023 187465
151 6 11511 36184467 Production 36182792 36182796 187002
152 6 11511 36184467 Production 36183911 36183915 186509
153 6 11511 36184467 Milk 36185332 36185336 250783
154 6 11511 36184467 Milk 36186103 36186107 103888
155 6 11511 36184467 Milk 36186103 36186107 104752
156 6 11511 36184467 Production 36186581 36186585 188641
157 6 11511 36184467 Production 36186688 36186692 189503
158 6 11511 36184467 Production 36187039 36187043 188875
159 6 11511 36184467 Production 36189076 36189080 188768
160 6 11511 36184467 Production 36191254 36191258 187381
161 6 11511 36184467 Production 36198700 36198704 186480
162 6 11511 36184467 Production 36203596 36203600 187794
163 6 11511 36184467 Milk 36204034 36204038 138157
164 6 11511 36184467 Production 36204034 36204038 186572
165 6 11511 36184467 Milk 36205214 36205218 15002
166 6 11511 36184467 Production 36206851 36206855 188861
167 6 11511 36184467 Production 36210548 36210552 187623
168 6 11511 36184467 Production 36211595 36211599 187645
169 6 11511 36184467 Production 36212184 36212188 189171
170 6 11511 36184467 Production 36212912 36212916 187151
171 6 11511 36184467 Production 36216422 36216426 187483
172 6 11511 36184467 Milk 36218000 36218004 103890
173 6 11511 36184467 Milk 36218000 36218004 104754
174 6 11511 36184467 Milk 36219842 36219846 105620
175 6 11511 36184467 Production 36219842 36219846 186455
176 6 11511 36184467 Production 36221082 36221086 187299
177 6 11511 36184467 Production 36221438 36221442 187378
178 6 11511 36184467 Production 36225616 36225620 186759
179 6 11511 36184467 Production 36226441 36226445 186605
180 6 11511 36184467 Production 36226963 36226967 23701
181 6 11511 36184467 Production 36226963 36226967 67245
182 6 11511 36184467 Production 36226963 36226967 67246
183 6 11511 36184467 Production 36226963 36226967 67247
184 6 11511 36184467 Milk 36226963 36226967 105509
185 6 11511 36184467 Production 36226963 36226967 164069
186 6 11511 36184467 Production 36226963 36226967 164370
187 6 11511 36184467 Production 36226963 36226967 186841
188 6 11511 36184467 Production 36227839 36227843 182676
189 6 11511 36184467 Production 36227839 36227843 188422
190 6 11511 36184467 Production 36232422 36232426 187967
191 6 11511 36184467 Production 36232742 36232746 187367
192 6 11511 36184467 Production 36233671 36233675 188839
193 14 25167 15929822 Milk 15895119 15895123 25606
194 14 25167 15929822 Milk 15895119 15895123 26355
195 14 25167 15929822 Meat_and_Carcass 15895119 15895123 36989
196 14 25167 15929822 Milk 15904094 15904098 104606
197 14 25167 15929822 Milk 15905538 15905542 104607
198 14 25167 15929822 Exterior 15929820 15929824 220989
199 14 25167 15929822 Production 15964283 15964287 122755
200 14 25167 15929822 Production 15964283 15964287 123539
201 14 25167 15929822 Production 15965017 15965021 122756
202 14 25167 15929822 Production 15965017 15965021 123540
203 14 25167 15929822 Production 15967515 15967519 122757
204 14 25167 15929822 Production 15967515 15967519 123541
205 14 25167 15929822 Production 15969384 15969388 122758
206 14 25167 15929822 Production 15969384 15969388 123542
207 14 25167 15929822 Production 15972216 15972220 122759
208 14 25167 15929822 Production 15972216 15972220 123543
209 14 25167 15929822 Production 15977822 15977826 122760
210 14 25167 15929822 Production 15977822 15977826 123544
211 15 26782 33066384 Production 33066382 33066386 68791
212 15 26782 33066384 Meat_and_Carcass 33107568 33107572 228743
213 15 27199 57930281 Milk 57892792 57892796 242339
214 15 27199 57930281 Milk 57914040 57914044 248429
215 15 27199 57930281 Milk 57926776 57926780 242396
216 15 27199 57930281 Milk 57926776 57926780 248428
217 15 27199 57930281 Milk 57949776 57949780 242391
218 17 29742 46487068 Milk 46449873 46449877 162016
219 17 29742 46487068 Milk 46449873 46449877 163641
220 17 29742 46487068 Milk 46452018 46452022 162017
221 17 29742 46487068 Milk 46452018 46452022 163642
222 17 29742 46487068 Milk 46455275 46455279 163230
223 17 29742 46487068 Milk 46455275 46455279 163643
224 17 29742 46487068 Milk 46457135 46457139 163231
225 17 29742 46487068 Milk 46457135 46457139 163644
226 17 29742 46487068 Milk 46460742 46460746 163159
227 17 29742 46487068 Milk 46460742 46460746 163583
228 17 29742 46487068 Milk 46463937 46463941 163232
229 17 29742 46487068 Milk 46463937 46463941 163645
230 17 29742 46487068 Milk 46474328 46474332 162015
231 17 29742 46487068 Milk 46474328 46474332 163640
232 17 29742 46487068 Milk 46474997 46475001 162014
233 17 29742 46487068 Milk 46474997 46475001 163639
234 17 29742 46487068 Milk 46476243 46476247 162013
235 17 29742 46487068 Milk 46476243 46476247 163638
236 17 29742 46487068 Milk 46476816 46476820 162012
237 17 29742 46487068 Milk 46476816 46476820 163637
238 17 29742 46487068 Milk 46480499 46480503 161948
239 17 29742 46487068 Milk 46480499 46480503 162037
240 17 29742 46487068 Milk 46481110 46481114 161941
241 17 29742 46487068 Milk 46481110 46481114 162030
242 17 29742 46487068 Milk 46481709 46481713 162019
243 17 29742 46487068 Milk 46481709 46481713 162100
244 17 29742 46487068 Milk 46482674 46482678 162020
245 17 29742 46487068 Milk 46482674 46482678 162101
246 17 29742 46487068 Milk 46483484 46483488 162021
247 17 29742 46487068 Milk 46483484 46483488 162102
248 17 29742 46487068 Milk 46529284 46529288 163162
249 17 29742 46487068 Milk 46529284 46529288 163584
250 17 29772 47884497 Milk 47837023 47837027 162764
251 17 29772 47884497 Milk 47844960 47844964 162876
252 17 29772 47884497 Milk 47851806 47851810 162921
253 17 29772 47884497 Milk 47855252 47855256 162648
254 17 29772 47884497 Production 47856400 47856404 23869
255 17 29772 47884497 Production 47856400 47856404 68904
256 17 29772 47884497 Exterior 47856400 47856404 102064
257 17 29772 47884497 Exterior 47884495 47884499 102065
258 17 29772 47884497 Milk 47924476 47924480 163030
259 17 29772 47884497 Milk 47924476 47924480 163517
260 17 29772 47884497 Milk 47926919 47926923 162858
261 17 29772 47884497 Milk 47926919 47926923 163422
262 17 29784 48800954 Milk 48759668 48759672 63966
263 20 33665 65351653 Milk 65321359 65321363 176120
264 20 33665 65351653 Milk 65350145 65350149 69962
265 20 33665 65351653 Milk 65352215 65352219 69963
266 20 33665 65351653 Milk 65357974 65357978 69964
267 20 33665 65351653 Milk 65363596 65363600 69626
268 20 33665 65351653 Health 65367232 65367236 167647
269 21 33836 4055731 Reproduction 4074797 4074801 145524
270 21 33836 4055731 Reproduction 4079204 4079208 145458
271 21 33836 4055731 Reproduction 4079247 4079251 145445
272 21 33836 4055731 Reproduction 4085667 4085671 145389
273 21 33836 4055731 Reproduction 4085667 4085671 146710
274 21 33836 4055731 Reproduction 4085817 4085821 146703
275 23 36688 44822126 Milk 44787659 44787663 254748
276 23 36688 44822126 Milk 44789048 44789052 254706
277 23 36688 44822126 Meat_and_Carcass 44807487 44807491 226965
278 23 36688 44822126 Meat_and_Carcass 44807487 44807491 229458
279 23 36688 44822126 Meat_and_Carcass 44807487 44807491 234754
280 23 36688 44822126 Milk 44808987 44808991 254767
281 23 36688 44822126 Milk 44830780 44830784 254833
282 23 36688 44822126 Milk 44834127 44834131 254725
283 23 36688 44822126 Milk 44845256 44845260 254688
284 23 36688 44822126 Milk 44848146 44848150 254689
285 23 36688 44822126 Milk 44859653 44859657 254769
286 24 36876 857728 Exterior 857726 857730 66121
287 24 36876 857728 Reproduction 885598 885602 106889
288 28 40795 20016672 Milk 20016670 20016674 112226
289 28 40795 20016672 Milk 20016670 20016674 118869
290 28 40795 20016672 Milk 20017763 20017767 112227
291 28 40795 20016672 Milk 20017763 20017767 118870
292 28 40795 20016672 Milk 20045011 20045015 118664
293 28 40795 20016672 Milk 20060184 20060188 118665
294 28 41057 35807366 Reproduction 35807364 35807368 138601
295 28 41057 35807366 Reproduction 35807364 35807368 138602
296 29 41785 33039863 Production 33039861 33039865 69450
297 29 41785 33039863 Production 33039861 33039865 69451

QTL type

#plotting the percentage of each QTL class annoatted
oldpar <- par(mar=c(1,15,0.5,1))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1.5)

My Image

Production QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Production")

My Image

Reproduction QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

My Image

Milk QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Milk")

My Image

Meat and Carcass QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class=""Meat_and_Carcass"")

My Image

Health QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

My Image

Exterior QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Exterior")

My Image

24.6.1 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_name.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_type.csv")


#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
27 Metabolic body weight 83 4039 292 163224 0.0000000 0.0000000 Production
29 Milk butyric acid content 22 828 292 163224 0.0000000 0.0000000 Milk
30 Milk caproic acid content 18 675 292 163224 0.0000000 0.0000000 Milk
12 Dry matter intake 17 2186 292 163224 0.0000006 0.0000090 Production
2 Aggressive behavior 2 8 292 163224 0.0000887 0.0008719 Exterior
25 Length of productive life 13 2004 292 163224 0.0000815 0.0008719 Production
35 Milk pentadecylic acid content 5 280 292 163224 0.0001640 0.0013823 Milk
33 Milk iron content 4 217 292 163224 0.0006685 0.0049302 Milk
15 Dystocia 2 31 292 163224 0.0014331 0.0090411 Reproduction
16 Fecal larva count 3 125 292 163224 0.0015324 0.0090411 Health
4 Body weight 16 4289 292 163224 0.0050041 0.0268400 Production
14 Duration of inactivity during open field test 1 9 292 163224 0.0159862 0.0785990 Exterior
13 Duration of exploration during novel object test 1 16 292 163224 0.0282437 0.1281828 Exterior
5 Body weight gain 6 1354 292 163224 0.0362258 0.1526660 Production
3 Anti-Müllerian hormone level 1 26 292 163224 0.0454908 0.1789305 Health
46 Polyunsaturated fatty acid content 1 28 292 163224 0.0489034 0.1803314 Meat and Carcass
45 Palmitoleic acid content 1 46 292 163224 0.0790752 0.2744374 Meat and Carcass
42 Muscle sodium content 1 56 292 163224 0.0954231 0.3127758 Meat and Carcass
28 Milk alpha-S1-casein percentage 1 65 292 163224 0.1098888 0.3382449 Milk
43 Myristoleic acid content 1 68 292 163224 0.1146593 0.3382449 Meat and Carcass
39 Milk unglycosylated kappa-casein percentage 7 2351 292 163224 0.1315212 0.3695119 Milk
21 Interdigital hyperplasia 1 93 292 163224 0.1534352 0.4052354 Exterior
41 Multiple birth 1 96 292 163224 0.1579731 0.4052354 Reproduction
19 Gestation length 2 636 292 163224 0.3149170 0.7146195 Reproduction
23 Interval to first estrus after calving 3 1053 292 163224 0.2917668 0.7146195 Reproduction
36 Milk protein percentage 18 8803 292 163224 0.3139794 0.7146195 Milk
8 Calving ease 6 3819 292 163224 0.6804471 0.9778366 Reproduction
10 Conception rate 2 1255 292 163224 0.6577041 0.9778366 Reproduction
17 Feet and leg conformation 1 627 292 163224 0.6752950 0.9778366 Exterior
18 Foot angle 1 672 292 163224 0.7005282 0.9778366 Exterior
22 Interval from first to last insemination 1 445 292 163224 0.5497190 0.9778366 Reproduction
24 Lean meat yield 1 621 292 163224 0.6717742 0.9778366 Meat and Carcass
38 Milk riboflavin content 1 509 292 163224 0.5986072 0.9778366 Milk
48 PTA type 1 627 292 163224 0.6752950 0.9778366 Production
49 Rear leg placement - side view 1 430 292 163224 0.5374280 0.9778366 Exterior
50 Rump width 1 526 292 163224 0.6106790 0.9778366 Production
52 Somatic cell score 2 1122 292 163224 0.5971200 0.9778366 Health
53 Stillbirth 2 1363 292 163224 0.7013705 0.9778366 Reproduction
54 Strength 1 664 292 163224 0.6961897 0.9778366 Exterior
55 Subcutaneous fat thickness 1 331 292 163224 0.4474847 0.9778366 Meat and Carcass
57 Udder attachment 1 655 292 163224 0.6912339 0.9778366 Exterior
58 Udder depth 1 695 292 163224 0.7126605 0.9778366 Exterior
59 Udder height 1 504 292 163224 0.5949862 0.9778366 Exterior
6 Bovine respiratory disease susceptibility 1 789 292 163224 0.7573589 0.9935536 Health
20 Inseminations per conception 1 790 292 163224 0.7577951 0.9935536 Reproduction
1 Age at puberty 1 8222 292 163224 0.9999997 0.9999997 Reproduction
7 Bovine tuberculosis susceptibility 1 1155 292 163224 0.8744991 0.9999997 Health
9 Carcass weight 1 2020 292 163224 0.9737344 0.9999997 Meat and Carcass
11 Connective tissue amount 1 3142 292 163224 0.9965892 0.9999997 Meat and Carcass
26 Marbling score 1 1817 292 163224 0.9620577 0.9999997 Meat and Carcass
31 Milk fat percentage 9 10941 292 163224 0.9979006 0.9999997 Milk
32 Milk fat yield 2 8220 292 163224 0.9999954 0.9999997 Milk
34 Milk kappa-casein percentage 5 4499 292 163224 0.9064431 0.9999997 Milk
37 Milk protein yield 1 3093 292 163224 0.9962701 0.9999997 Milk
40 Milk yield 4 6432 292 163224 0.9970834 0.9999997 Milk
44 Net merit 1 903 292 163224 0.8023701 0.9999997 Production
47 Pregnancy rate 1 944 292 163224 0.8164356 0.9999997 Reproduction
51 Shear force 2 2954 292 163224 0.9692776 0.9999997 Meat and Carcass
56 Tenderness score 4 3483 292 163224 0.8712212 0.9999997 Meat and Carcass

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 12 9077 292 163224 0.8914062 1
2 Health 8 5889 292 163224 0.8293292 1
3 Meat and Carcass 15 18258 292 163224 0.9998999 1
4 Milk 97 75352 292 163224 0.9999974 1
5 Production 138 19640 292 163224 0.0000000 0
6 Reproduction 22 35008 292 163224 1.0000000 1

My Image

24.7 BLUPF90+ GPROFILER ON-LINE

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

24.8 BLUPF90+ VEP

My Image

rs110079750 My Image

rs109276211 My Image

rs109631116 My Image

rs42029843 My Image

rs110428837 My Image

rs110081798 My Image

rs110160157 My Image

rs110038841 My Image

rs109868537 My Image

rs110565206 My Image

rs42751504 My Image

rs110121846 My Image

rs110935391 My Image

rs109584097 My Image

rs41585925 My Image

rs110506037 My Image

rs109273103 My Image

rs109575643 My Image

rs43305418 My Image

rs43377276 My Image

rs43691687 My Image

rs41764450 My Image

rs42305073 No

rs42462935 No

rs42752353 My Image

rs81154019 No

rs41645754 My Image

rs41566051 My Image

rs41636137 My Image

rs41615935 My Image

rs29026778 My Image

rs29013980 My Image

rs29023486 My Image

rs29027498 My Image

rs29017061 My Image

rs29018741 My Image

rs29013301 My Image

rs29012492 My Image

It is interesting that 3 significant SNPs falled in the same gene CSMD2 on chromosome 3 rs109868537 rs42751504 rs42752353

My Image

My Image

For the gene CNTNAP2 on chromosome 4 also have 2 significant SNPs rs41566051 rs29012492

My Image

This code will generate several files, among them:
  • chrsnp_pval: which has the -log10(pvalue) the 3rd column
  • Pft1e3.R: a r-code to generate the Manhattan plot in R

24.8.0.1 Manhattan Plots for BLUPF90 GWAS

24.8.0.1.1 Genome Independent Segment

To make the Manhattan Plot considering Genome Independent Segment we should run the code bellow. This code has part of the code in the file Pft1e3.R

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO")
# Read in and process data for Manhattan plot
yyy1 <- read.table("chrsnp_pval")
yyy <- yyy1[order(yyy1$V4), ]
zzz <- yyy[which(yyy$V1 == 1 & yyy$V2 == 3), ]
n <- nrow(zzz)
y <- zzz[, 4]
x <- zzz[, 3]
chr1 <- zzz[, 5]
chr <- NULL
pos <- NULL

for (i in unique(yyy$V5)) {
  zz <- yyy[yyy$V5 == i, ]
  key <- zz$V4
  medio <- round(nrow(zz) / 2, 0)
  z <- key[medio]
  pos <- c(pos, z)
}

chrn <- unique(yyy$V5)
one <- which(chr1 %% 4 == 0)
two <- which(chr1 %% 4 == 1)
three <- which(chr1 %% 4 == 2)
four <- which(chr1 %% 4 == 3)
chr[one] <- "darkgoldenrod"
chr[two] <- "darkorchid"
chr[three] <- "blue"
chr[four] <- "forestgreen"

# Create Manhattan plot with Bonferroni line and legend
pdf(file = "Pft1e3_manplot_with_bonf_ind_seg.pdf", family = "sans", height = 27.8, width = 50, pointsize = 20, bg = "white")
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)

# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)

# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)

axis(1, at = pos, labels = chrn)
dev.off()

My Image

For additional analysis like Variant Effect Prediction (VEP) we need the rsID, to get the rsID we use the software SNPChimp which requires SNP_names, but the BLUPF90 output have only the Chromosome and Position of the SNPs, so we are going to perfome these two steps to get one file with t he significant SNPs + SNP_name + rsID

24.8.1 Results

We have 2 different output files

  1. Variance components: blupf90.log

My Image

In this file we can find the heritabilit (SD) and for instance the convergence (similarity)

  1. Solutions: solutions.orig
    In this file we will find the solutions (results) for each effect

In our example:

  • EFFECT 1: Birth Year, has 4 levels (2018, 2019, 2020 and 2021), and the solution that for this fixed effect is how much each level add.
  • EFFECT 2: Sampling date, has 23 levels, and the solutions
  • EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.

    My Image

24.8.2 Variance components estimation with the whole sample

We’LL estimate the GRM on GCTA fro all 252 animals.

First I will need to build a

24.8.3 GWAS

Now we’ll estimate the Variance Components for all 252 animals on BLUPF90+ With the Variance Components Estimations we’ll perform a GWAS on BLUPF90+ only for the animals with Extreme Phenotypes.

24.8.4 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result_NO_CONSTRAIN.loco.mlma", 
                  head=T, stringsAsFactors = F)


gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf<- -log10(0.15/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topright", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")


library(tidyverse)

SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt")

The SNPs below are the significant after the correction above

X Chr SNP bp A1 A2 Freq b se p logP
1 3 BTB-00148619 105840426 A C 0.321429 410.739 104.530 0.0000852 4.069736
2 3 ARS-BFGL-NGS-118207 110866523 A C 0.421429 402.730 103.634 0.0001019 3.991928
3 22 ARS-BFGL-NGS-23411 5134078 A C 0.214286 415.637 109.100 0.0001391 3.856526

My Image

Bellow we inserted the rsID searched at SNPCHIMP

X Chr rsID SNP bp A1 A2 Freq b se p logP
1 3 rs43357861 BTB-00148619 105840426 A C 0.321429 410.739 104.530 0.0000852 4.069736
2 3 rs110081798 ARS-BFGL-NGS-118207 110866523 A C 0.421429 402.730 103.634 0.0001019 3.991928
3 22 rs110031217 ARS-BFGL-NGS-23411 5134078 A C 0.214286 415.637 109.100 0.0001391 3.856526

24.8.5 GALLO

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.csv")

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.csv")

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 3 BTB-00148619 105840426 3 105807211 105810027 2817 + ENSBTAG00000020622 COL9A2 lncRNA
2 3 BTB-00148619 105840426 3 105824376 105867157 42782 - ENSBTAG00000044178 ZMPSTE24 protein_coding
3 3 BTB-00148619 105840426 3 105884724 105887360 2637 - ENSBTAG00000019698 TMCO2 protein_coding
4 3 ARS-BFGL-NGS-118207 110866523 3 110769753 110827938 58186 + ENSBTAG00000013867 DLGAP3 protein_coding
5 3 ARS-BFGL-NGS-118207 110866523 3 110835276 110839536 4261 + ENSBTAG00000014235 SMIM12 protein_coding
6 3 ARS-BFGL-NGS-118207 110866523 3 110888198 110890904 2707 - ENSBTAG00000013881 GJA4 protein_coding
7 3 ARS-BFGL-NGS-118207 110866523 3 110900183 110905849 5667 - ENSBTAG00000012584 GJB3 protein_coding
8 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 3 BTB-00148619 105840426 Production 105835011 105835015 287197
2 3 ARS-BFGL-NGS-118207 110866523 Meat_and_Carcass 110866521 110866525 152332
3 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
4 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
5 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601

24.8.5.1 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_name_extr_15.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_type_extr_15.csv")


#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
2 Muscle carnosine content 3 67 5 163224 0.0000000 0.0000000 Meat and Carcass
3 Polyunsaturated fatty acid content 1 28 5 163224 0.0008574 0.0012861 Meat and Carcass
1 Body depth 1 616 5 163224 0.0187281 0.0187281 Production

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Meat and Carcass 4 18258 5 163224 0.0007125 0.0014251
2 Production 1 19640 5 163224 0.4732476 0.4732476

My Image

24.8.6 gPROFILER online

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

25 GWAS on GCTA

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create data_final$cdn_id by matching IDs with elora_id
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

identical(fixeff$cdn_id, pheno$cdn_id)

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt", quote = F, row.names = F, col.names = T)
#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

25.1 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

{r eval=FALSE}
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)

gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf<- -log10(0.05/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")


library(tidyverse)

SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg2.csv")

The SNPs below are the significant after the correction above

X Chr SNP bp A1 A2 Freq b se p logP
1 22 ARS-BFGL-NGS-23411 5134078 A C 0.1872510 196.185 43.9934 8.20e-06 5.085250
2 22 ARS-BFGL-NGS-26419 46052082 A C 0.0816733 295.245 66.9504 1.03e-05 4.985404
3 26 BTB-00926636 17857480 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
4 26 BTA-62125-no-rs 17899619 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
5 28 BTA-99379-no-rs 41666186 A C 0.0498008 394.376 83.4532 2.30e-06 5.639587

My Image

Bellow we inserted the rsID searched at SNPCHIMP

X Chr rsID SNP bp A1 A2 Freq b se p logP
1 22 rs110031217 ARS-BFGL-NGS-23411 5134078 A C 0.1872510 196.185 43.9934 8.20e-06 5.085250
2 22 rs110991998 ARS-BFGL-NGS-26419 46052082 A C 0.0816733 295.245 66.9504 1.03e-05 4.985404
3 26 rs42089058 BTB-00926636 17857480 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
4 26 rs41644634 BTA-62125-no-rs 17899619 C A 0.0258964 491.154 112.4840 1.26e-05 4.898635
5 28 rs41567074 BTA-99379-no-rs 41666186 A C 0.0498008 394.376 83.4532 2.30e-06 5.639587

25.1.1 Option 2: Manhattan Plot

gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)
# Prepare the dataset
don <- gwas %>% 
  group_by(Chr) %>% 
  summarise(chr_len = max(bp)) %>% 
  mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
  select(-chr_len) %>%
  left_join(gwas, ., by = c("Chr" = "Chr")) %>%
  arrange(Chr, bp) %>%
  mutate(BPcum = as.numeric(bp) + tot) %>%
  mutate(is_highlight = ifelse(logP > bonf, "yes", "no")) %>%
  mutate(is_annotate = ifelse(logP > bonf, "yes", "no"))

# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))

# Prepare X axis
axisdf <- don %>% 
  group_by(Chr) %>% 
  summarize(center = (max(BPcum) + min(BPcum)) / 2)

# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)

# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
  geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
  scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
  scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
  geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
  geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
  theme_bw() +
  theme(
    legend.position = "none",
    panel.border = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  ) +
  labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")

My Image

26 VEP

After insert all rsID on VEP the summary is shown below:

My Image

As can be seen, the VEP only analysed 14 variants

Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:

rs110031217 My Image

rs110991998 My Image

rs42089058 My Image

rs41644634 My Image

rs41567074 My Image

ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.

27 GALLO

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.csv")

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl2.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl_clean2.csv")

Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html

Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 22 ARS-BFGL-NGS-26419 46052082 22 45924535 46818511 893977 - ENSBTAG00000013117 CACNA2D3 protein_coding
2 22 ARS-BFGL-NGS-26419 46052082 22 46051998 46062657 10660 + ENSBTAG00000013124 LRTM1 protein_coding
3 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding
4 26 BTB-00926636 17857480 26 17703169 17846407 143239 - ENSBTAG00000011743 TLL2 protein_coding
5 26 BTB-00926636 17857480 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
6 26 BTA-62125-no-rs 17899619 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
7 26 BTA-62125-no-rs 17899619 26 17925643 18029878 104236 - ENSBTAG00000019872 PIK3AP1 protein_coding
8 28 BTA-99379-no-rs 41666186 28 41609015 41649016 40002 - ENSBTAG00000007540 GLUD1 protein_coding
9 28 BTA-99379-no-rs 41666186 28 41650333 41763371 113039 + ENSBTAG00000019194 SHLD2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
2 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
3 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601
4 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51802
5 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51803
6 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51804
7 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51805
8 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51806
9 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51807
10 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51808
11 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51809
12 22 ARS-BFGL-NGS-26419 46052082 Health 46052080 46052084 51810
13 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51811
14 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51812
15 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51813
16 22 ARS-BFGL-NGS-26419 46052082 Milk 46073517 46073521 243365
17 26 BTB-00926636 17857480 Milk 17809768 17809772 195264
18 26 BTB-00926636 17857480 Milk 17809768 17809772 200269
19 26 BTB-00926636 17857480 Milk 17809768 17809772 200270
20 26 BTB-00926636 17857480 Milk 17809768 17809772 205295
21 26 BTB-00926636 17857480 Milk 17810363 17810367 198859
22 26 BTB-00926636 17857480 Milk 17810363 17810367 203868
23 26 BTB-00926636 17857480 Milk 17810363 17810367 203869
24 26 BTB-00926636 17857480 Milk 17810363 17810367 210297
25 26 BTB-00926636 17857480 Milk 17811021 17811025 198858
26 26 BTB-00926636 17857480 Milk 17811021 17811025 203866
27 26 BTB-00926636 17857480 Milk 17811021 17811025 203867
28 26 BTB-00926636 17857480 Milk 17811021 17811025 210296
29 26 BTB-00926636 17857480 Milk 17811725 17811729 197493
30 26 BTB-00926636 17857480 Milk 17811725 17811729 202704
31 26 BTB-00926636 17857480 Milk 17812273 17812277 195685
32 26 BTB-00926636 17857480 Milk 17812273 17812277 200785
33 26 BTB-00926636 17857480 Milk 17813212 17813216 195285
34 26 BTB-00926636 17857480 Milk 17813212 17813216 200301
35 26 BTB-00926636 17857480 Milk 17813978 17813982 196236
36 26 BTB-00926636 17857480 Milk 17813978 17813982 201468
37 26 BTB-00926636 17857480 Milk 17815237 17815241 198002
38 26 BTB-00926636 17857480 Milk 17815237 17815241 203190
39 26 BTB-00926636 17857480 Milk 17817223 17817227 34750
40 26 BTB-00926636 17857480 Milk 17817223 17817227 195586
41 26 BTB-00926636 17857480 Milk 17821297 17821301 197235
42 26 BTB-00926636 17857480 Milk 17821297 17821301 202455
43 26 BTB-00926636 17857480 Milk 17822209 17822213 197783
44 26 BTB-00926636 17857480 Milk 17822209 17822213 202983
45 26 BTB-00926636 17857480 Milk 17823105 17823109 196204
46 26 BTB-00926636 17857480 Milk 17823105 17823109 201419
47 26 BTB-00926636 17857480 Milk 17824770 17824774 196867
48 26 BTB-00926636 17857480 Milk 17824770 17824774 202101
49 26 BTB-00926636 17857480 Milk 17833551 17833555 195748
50 26 BTB-00926636 17857480 Milk 17833551 17833555 200855
51 26 BTB-00926636 17857480 Milk 17833551 17833555 206213
52 26 BTB-00926636 17857480 Milk 17865069 17865073 197144
53 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 197144
54 26 BTB-00926636 17857480 Milk 17865069 17865073 202367
55 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 202367
56 26 BTB-00926636 17857480 Milk 17865069 17865073 208485
57 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 208485
58 26 BTB-00926636 17857480 Milk 17871201 17871205 197611
59 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 197611
60 26 BTB-00926636 17857480 Milk 17871201 17871205 202816
61 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 202816
62 26 BTB-00926636 17857480 Milk 17871201 17871205 209076
63 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 209076
64 26 BTB-00926636 17857480 Milk 17885871 17885875 63656
65 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 63656
66 26 BTB-00926636 17857480 Milk 17885871 17885875 199068
67 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 199068
68 26 BTB-00926636 17857480 Milk 17885871 17885875 204113
69 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204113
70 26 BTB-00926636 17857480 Milk 17885871 17885875 204114
71 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204114
72 26 BTB-00926636 17857480 Milk 17885871 17885875 210399
73 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 210399
74 26 BTB-00926636 17857480 Milk 17895273 17895277 63657
75 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 63657
76 26 BTB-00926636 17857480 Milk 17895273 17895277 199069
77 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 199069
78 26 BTB-00926636 17857480 Milk 17895273 17895277 204115
79 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204115
80 26 BTB-00926636 17857480 Milk 17895273 17895277 204116
81 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204116
82 26 BTB-00926636 17857480 Milk 17895273 17895277 210400
83 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 210400
84 26 BTB-00926636 17857480 Milk 17895803 17895807 199070
85 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 199070
86 26 BTB-00926636 17857480 Milk 17895803 17895807 204117
87 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204117
88 26 BTB-00926636 17857480 Milk 17895803 17895807 204118
89 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204118
90 26 BTB-00926636 17857480 Milk 17895803 17895807 210401
91 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 210401
92 26 BTB-00926636 17857480 Milk 17900304 17900308 199071
93 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 199071
94 26 BTB-00926636 17857480 Milk 17900304 17900308 204119
95 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204119
96 26 BTB-00926636 17857480 Milk 17900304 17900308 204120
97 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204120
98 26 BTB-00926636 17857480 Milk 17900304 17900308 210402
99 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 210402
100 26 BTB-00926636 17857480 Milk 17902932 17902936 63658
101 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 63658
102 26 BTB-00926636 17857480 Milk 17902932 17902936 199072
103 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 199072
104 26 BTB-00926636 17857480 Milk 17902932 17902936 204121
105 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204121
106 26 BTB-00926636 17857480 Milk 17902932 17902936 204122
107 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204122
108 26 BTB-00926636 17857480 Milk 17902932 17902936 210403
109 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 210403
110 26 BTB-00926636 17857480 Milk 17906861 17906865 63660
111 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 63660
112 26 BTB-00926636 17857480 Milk 17906861 17906865 199074
113 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 199074
114 26 BTB-00926636 17857480 Milk 17906861 17906865 204125
115 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204125
116 26 BTB-00926636 17857480 Milk 17906861 17906865 204126
117 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204126
118 26 BTB-00926636 17857480 Milk 17906861 17906865 210405
119 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 210405
120 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 63654
121 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 199076
122 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204129
123 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204130
124 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 210407
125 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 63655
126 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 197826
127 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203026
128 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203027
129 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 209354
130 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 195860
131 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201008
132 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201009
133 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 206462
134 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 33984
135 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195668
136 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195669
137 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195670
138 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 200758
139 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 199092
140 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 204151
141 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107048
142 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107227

27.1 QTL annotation on GALLO

QTL type

#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)

My Image

QTL type

#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

Reproduction My Image

Health My Image

27.2 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_name_2.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_type_2.csv")


#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
5 Milk C14 index 35 4437 108 163224 0.0000000 0.0000000 Milk
9 Milk myristoleic acid content 26 3047 108 163224 0.0000000 0.0000000 Milk
10 Milk palmitoleic acid content 15 2422 108 163224 0.0000000 0.0000000 Milk
6 Milk C16 index 12 2002 108 163224 0.0000000 0.0000001 Milk
12 Muscle carnosine content 3 67 108 163224 0.0000131 0.0000497 Meat and Carcass
14 Pregnancy rate 2 944 108 163224 0.1296627 0.4105985 Reproduction
18 Teat length 1 300 108 163224 0.1802436 0.4892327 Exterior
15 Rear leg placement - side view 1 430 108 163224 0.2479752 0.5235032 Exterior
17 Stillbirth 2 1363 108 163224 0.2280294 0.5235032 Reproduction
3 Foot angle 1 672 108 163224 0.3596272 0.6162877 Exterior
7 Milk capric acid content 1 912 108 163224 0.4541067 0.6162877 Milk
8 Milk myristic acid content 1 902 108 163224 0.4504612 0.6162877 Milk
13 Net merit 1 903 108 163224 0.4508269 0.6162877 Production
19 Udder depth 1 695 108 163224 0.3693423 0.6162877 Exterior
16 Somatic cell score 1 1122 108 163224 0.5253603 0.6654564 Health
2 Conception rate 1 1255 108 163224 0.5656375 0.6716945 Reproduction
1 Calving ease 2 3819 108 163224 0.7219243 0.7776744 Reproduction
4 Length of productive life 1 2004 108 163224 0.7367441 0.7776744 Production
11 Milk yield 1 6432 108 163224 0.9870080 0.9870080 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 4 9077 108 163224 0.8569775 0.9999953
2 Health 1 5889 108 163224 0.9811250 0.9999953
3 Meat and Carcass 3 18258 108 163224 0.9997109 0.9999953
4 Milk 91 75352 108 163224 0.0000000 0.0000000
5 Production 2 19640 108 163224 0.9999848 0.9999953
6 Reproduction 7 35008 108 163224 0.9999953 0.9999953

My Image

28 GPROFILER ON-LINE

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

29 Genetic Correlation

To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.

Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.

Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.

The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]

Where:

This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.

29.1 Data preparation

The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files

rm(list = ls())

# Load the necessary library
library(dplyr)
library(tidyverse)

cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")


sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]

sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]

print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)

# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)

# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)

# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))

if (have_same_columns) {
  print("The dataframes have the same columns.")
} else {
  print("The dataframes do not have the same columns.")
}


# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))

if (same_order) {
  print("The columns are in the same order.")
} else {
  print("The columns are not in the same order.")
}

GEBVs_combined <- rbind(GEBVs1, GEBVs2)

# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)

# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)

# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME

# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
  select(elora_id, DHI_BARN_NAME, everything())

write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")

# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")

write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))

# The data below has the the 55 GEBVs + Cortisol data + Birth Year
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.csv")

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date$Sampling_date <- as.Date(samp_date$Sampling_date, format = "%m/%d/%Y")

table(samp_date$Sampling_date)

samp_date <- select(samp_date, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date, everything())

# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")

ps. I double checked by hand the select and merge process against the original tables received and is everything ok.

29.2 Correlations - Linear Regression

29.2.1 Test with one trait

# Fit the linear regression model
model <- lm(T4Cortisol ~ MILK, data = data)

# Summarize the model to get the regression coefficients and statistical summary
model_summary <- summary(model)

# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1]
p_value <- pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)

# Combine the statistics into a data frame
results <- data.frame(
  Multiple_R_Squared = multiple_r_squared,
  Adjusted_R_Squared = adjusted_r_squared,
  F_Statistic = f_statistic,
  P_Value = p_value
)

# Save the results to a CSV file
write.csv(results, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_milk.csv", row.names = FALSE)

# Plot the linear regression between T4Cortisol and MILK
ggplot(data, aes(x = MILK, y = T4Cortisol)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Add linear regression line
  labs(title = "Linear Regression of T4Cortisol on MILK",
       x = "MILK",
       y = "T4Cortisol") +
  theme_minimal()

Plot linear regression Cortisol vs. Milk GEBV My Image

Summary statistics

Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value
5.2e-05 -0.0038237 0.0134257 0.9078463

29.2.2 Loop for all traits

As we have 54 GEBVs to fit a linear regression we designed a loop:

# Initialize a list to store the results
results_list <- list()

# Loop through columns 4 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
  trait_name <- colnames(data)[i]
  
  # Fit the linear regression model
  model <- lm(data[[2]] ~ data[[i]], data = data)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 3]] <- result
}

# Combine all results into a single data frame
results_df_0 <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
BMR 0.0400473 0.0363266 10.7632421 0.0011781 21.1128882 0.0011781
CO 0.0280949 0.0243278 7.4580136 0.0067510 -13.2769834 0.0067510
MSL 0.0198445 0.0160455 5.2235464 0.0230946 -16.9454262 0.0230946
CTFS 0.0185227 0.0147186 4.8690531 0.0282237 15.8469990 0.0282237
UT 0.0166425 0.0128310 4.3664218 0.0376334 -15.6741466 0.0376334
MS 0.0128559 0.0090298 3.3600230 0.0679494 -14.7558389 0.0679494
CK 0.0106156 0.0067808 2.7682148 0.0973674 14.6866464 0.0973674
IH 0.0101222 0.0062855 2.6382359 0.1055404 -7.3062481 0.1055404
DD 0.0100161 0.0061790 2.6103042 0.1073935 -9.5352332 0.1073935
CONF 0.0095452 0.0057062 2.4863924 0.1160600 -13.9681677 0.1160600
UD 0.0085295 0.0046866 2.2195470 0.1374945 -15.7518165 0.1374945
SU 0.0077590 0.0039131 2.0174678 0.1567056 7.3716271 0.1567056
MT 0.0076274 0.0037810 1.9829901 0.1602796 -10.6502458 0.1602796
LP 0.0074242 0.0035770 1.9297608 0.1659824 9.2112199 0.1659824
MET 0.0070642 0.0032156 1.8355367 0.1766601 -7.6934858 0.1766601
MR 0.0066755 0.0028254 1.7338637 0.1890867 -7.8752018 0.1890867
DO 0.0060791 0.0022267 1.5779921 0.2101864 9.5682702 0.2101864
DHL 0.0052895 0.0014340 1.3719419 0.2425591 -9.4618477 0.2425591
MOB 0.0047368 0.0008792 1.2279220 0.2688435 7.8139790 0.2688435
DF 0.0046402 0.0007822 1.2027541 0.2737945 7.8603320 0.2737945
FAT 0.0046204 0.0007623 1.1975980 0.2748229 1.0090961 0.2748229
SH 0.0043235 0.0004643 1.1203076 0.2908422 5.2472606 0.2908422
DS 0.0043121 0.0004528 1.1173348 0.2914818 -7.5550660 0.2914818
FOOT 0.0043005 0.0004412 1.1143288 0.2921304 26.7441735 0.2921304
FA 0.0042762 0.0004168 1.1079850 0.2935052 -8.0817790 0.2935052
PROT 0.0040026 0.0001421 1.0368190 0.3095163 1.2915168 0.3095163
WL 0.0038262 -0.0000350 0.9909414 0.3204451 5.2197508 0.3204451
MDR 0.0038096 -0.0000516 0.9866366 0.3214965 7.0545996 0.3214965
CA 0.0036379 -0.0002239 0.9420177 0.3326685 7.4603427 0.3326685
ME 0.0034176 -0.0004452 0.8847561 0.3477821 -5.0950658 0.3477821
SCS 0.0031318 -0.0007321 0.8105371 0.3688011 -4.8756313 0.3688011
FE 0.0028874 -0.0009774 0.7470985 0.3881993 -5.1774356 0.3881993
FTP 0.0027874 -0.0010778 0.7211475 0.3965550 9.8351267 0.3965550
AFS 0.0025212 -0.0013450 0.6521188 0.4201001 -5.9185799 0.4201001
UF 0.0018686 -0.0020001 0.4830008 0.4876916 6.8229142 0.4876916
ST 0.0018050 -0.0020640 0.4665276 0.4952019 -7.1228081 0.4952019
MSP 0.0014890 -0.0023812 0.3847251 0.5356327 -3.5290172 0.5356327
HL 0.0013205 -0.0025503 0.3411423 0.5596809 -4.3615962 0.5596809
SCK 0.0011411 -0.0027305 0.2947310 0.5876733 3.3934702 0.5876733
TL 0.0009008 -0.0029717 0.2326179 0.6299983 -5.0172076 0.6299983
LOC 0.0008542 -0.0030185 0.2205721 0.6390011 -3.5628830 0.6390011
DA 0.0006826 -0.0031907 0.1762328 0.6749803 3.5280623 0.6749803
CW 0.0005859 -0.0032878 0.1512479 0.6976664 -2.7721164 0.6976664
BCS 0.0003594 -0.0035151 0.0927672 0.7609338 2.1412729 0.7609338
RUM 0.0003149 -0.0035598 0.0812707 0.7758114 -1.6952340 0.7758114
TU 0.0002204 -0.0036547 0.0568793 0.8116875 1.0400512 0.8116875
FL 0.0001793 -0.0036960 0.0462618 0.8298705 1.5296709 0.8298705
BQ 0.0001697 -0.0037056 0.0437957 0.8343993 -1.5434870 0.8343993
HHE 0.0000521 -0.0038236 0.0134478 0.9077707 0.6475671 0.9077707
MILK 0.0000520 -0.0038237 0.0134257 0.9078463 0.0042447 0.9078463
HH 0.0000509 -0.0038249 0.0131340 0.9088482 -0.6606061 0.9088482
BD 0.0000293 -0.0038466 0.0075535 0.9308097 -0.6433874 0.9308097
DCA 0.0000046 -0.0038713 0.0011932 0.9724707 -0.2685932 0.9724707
FEED 0.0000001 -0.0038759 0.0000152 0.9968903 -0.0190613 0.9968903
Five out 54 traits GEBVs presented significant correlations with Cortisol:
  • BMR = Body Maintenance Requirements
  • CO = Cystic ovaries
  • MSL = Median Suspensory Ligament
  • CTFS = Calving to First Service
  • UT = Udder Texture

29.2.3 Adding BIRTH_YEAR

29.2.3.1 Model Description

The regression model added the BY is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The BIRTH_YEAR variable is converted to a factor to account for the categorical nature of birth years.

# Convert BIRTH_YEAR to a factor and rename
data$BIRTH_YEAR <- as.factor(data$BIRTH_YEAR)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
  trait_name <- colnames(data)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data[[2]] ~ data[[i]] + BIRTH_YEAR, data = data)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
BMR 0.0881915 0.0738886 6.165997 0.0000945 19.1527245 0.0028131
CO 0.0803421 0.0659161 5.569252 0.0002590 -12.5790675 0.0094097
MSL 0.0734487 0.0589145 5.053527 0.0006186 -16.2613977 0.0277614
IH 0.0719150 0.0573568 4.939831 0.0007494 -9.3449051 0.0354801
CTFS 0.0705440 0.0559642 4.838504 0.0008891 14.3231800 0.0442704
UT 0.0704128 0.0558310 4.828827 0.0009037 -14.8606501 0.0452229
MS 0.0686230 0.0540132 4.697043 0.0011284 -14.8462730 0.0606040
CK 0.0670600 0.0524256 4.582365 0.0013689 15.4450943 0.0785803
PROT 0.0658116 0.0511577 4.491053 0.0015963 2.1108214 0.0970292
DD 0.0638377 0.0491528 4.347166 0.0020332 -8.6326827 0.1365405
FAT 0.0628176 0.0481167 4.273045 0.0023028 1.2796392 0.1637385
SU 0.0623970 0.0476895 4.242529 0.0024239 7.0165690 0.1767005
CONF 0.0621537 0.0474424 4.224892 0.0024968 -11.6231634 0.1847326
MET 0.0618359 0.0471196 4.201861 0.0025952 -7.2376432 0.1958719
MR 0.0617517 0.0470341 4.195767 0.0026218 -7.5588944 0.1989504
FOOT 0.0614269 0.0467042 4.172251 0.0027273 31.1509094 0.2113774
MOB 0.0613973 0.0466741 4.170111 0.0027372 8.6217376 0.2125534
LP 0.0613481 0.0466241 4.166549 0.0027536 8.0864569 0.2145285
FTP 0.0608284 0.0460963 4.128971 0.0029327 13.5010333 0.2367661
DO 0.0607904 0.0460577 4.126220 0.0029463 8.8960747 0.2385010
ME 0.0607825 0.0460496 4.125648 0.0029491 -6.2944604 0.2388636
UD 0.0606216 0.0458862 4.114024 0.0030071 -12.1091142 0.2463830
FA 0.0605549 0.0458185 4.109209 0.0030315 -8.6648464 0.2495819
MT 0.0601784 0.0454361 4.082026 0.0031728 -8.2421107 0.2686411
CA 0.0599002 0.0451535 4.061948 0.0032814 8.3011337 0.2838987
DF 0.0598369 0.0450892 4.057381 0.0033066 7.4854663 0.2875213
DHL 0.0595211 0.0447684 4.034611 0.0034352 -8.1319072 0.3064961
SH 0.0586677 0.0439017 3.973163 0.0038074 4.4345159 0.3666760
MILK 0.0585384 0.0437704 3.963861 0.0038671 0.0328321 0.3771595
MDR 0.0584166 0.0436466 3.955101 0.0039243 6.0467308 0.3874240
ST 0.0578881 0.0431099 3.917120 0.0041817 -7.9781463 0.4369789
SCS 0.0576964 0.0429152 3.903356 0.0042790 -3.9614430 0.4573264
AFS 0.0575918 0.0428089 3.895845 0.0043331 -5.2064660 0.4690656
MSP 0.0575087 0.0427245 3.889880 0.0043765 -3.9533312 0.4787399
DS 0.0573283 0.0425413 3.876936 0.0044723 -4.7841746 0.5009026
WL 0.0572605 0.0424724 3.872073 0.0045088 3.4479700 0.5096796
TL 0.0572547 0.0424665 3.871655 0.0045119 -6.7563254 0.5104479
FE 0.0572054 0.0424165 3.868123 0.0045386 -3.8209975 0.5170084
DA 0.0566761 0.0418789 3.830184 0.0048356 4.3714958 0.5986609
HHE 0.0565353 0.0417358 3.820095 0.0049178 2.6941677 0.6249126
HL 0.0563327 0.0415301 3.805592 0.0050384 -3.1669335 0.6676242
LOC 0.0562600 0.0414563 3.800388 0.0050823 -3.0195770 0.6847865
FL 0.0562527 0.0414489 3.799865 0.0050868 2.8115963 0.6865743
SCK 0.0560192 0.0412117 3.783153 0.0052307 1.9379506 0.7520160
CW 0.0559471 0.0411384 3.777995 0.0052759 -1.9950607 0.7767448
FEED 0.0559443 0.0411356 3.777797 0.0052777 -1.3559978 0.7777588
BQ 0.0559195 0.0411104 3.776021 0.0052933 -1.9570851 0.7870649
TU 0.0558834 0.0410737 3.773438 0.0053162 1.0731908 0.8014578
UF 0.0558449 0.0410347 3.770689 0.0053406 2.2354069 0.8181403
DCA 0.0557115 0.0408991 3.761148 0.0054263 -1.0069789 0.8965483
BD 0.0557011 0.0408885 3.760401 0.0054331 0.8702554 0.9055126
BCS 0.0556786 0.0408657 3.758795 0.0054477 0.6193741 0.9285710
RUM 0.0556595 0.0408464 3.757433 0.0054601 -0.3154634 0.9570449
HH 0.0556561 0.0408428 3.757185 0.0054623 -0.2539972 0.9646162
Fitting Birth_Year to the model made one more trait significativally correlated with cortisol and enhanced the proportio of variation explained by the model:
  • BMR = Body Maintenance Requirements
  • CO = Cystic ovaries
  • MSL = Median Suspensory Ligament
  • CTFS = Calving to First Service
  • UT = Udder Texture
  • IH = Interdigital Hyperplasia

Below we can check the improvement in the percentage of variation in Cortisol explained by the Trait_GEBVs + Birth_Year.

CORR_BY <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv")

CORR_MIN <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv")

CORR_BY <- CORR_BY[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]

CORR_MIN <- CORR_MIN[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]

colnames(CORR_BY)[colnames(CORR_BY) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_BY"

colnames(CORR_MIN)[colnames(CORR_MIN) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_MIN"

Corr_R2_comp <- merge(CORR_MIN, CORR_BY, by.x = "Trait", by.y = "Trait")

Corr_R2_comp$Absolute_Change <- Corr_R2_comp$Adjusted_R_Squared_BY - Corr_R2_comp$Adjusted_R_Squared_MIN

write.csv(Corr_R2_comp, "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/absolute_improv_r2.csv")
X Trait Adjusted_R_Squared_Model_MIN Adjusted_R_Squared_MIN.BY Absolute_Change
1 AFS -0.0013450 0.0428089 0.0441539
2 BCS -0.0035151 0.0408657 0.0443809
3 BD -0.0038466 0.0408885 0.0447351
4 BMR 0.0363266 0.0738886 0.0375621
5 BQ -0.0037056 0.0411104 0.0448160
6 CA -0.0002239 0.0451535 0.0453774
7 CK 0.0067808 0.0524256 0.0456448
8 CO 0.0243278 0.0659161 0.0415882
9 CONF 0.0057062 0.0474424 0.0417362
10 CTFS 0.0147186 0.0559642 0.0412457
11 CW -0.0032878 0.0411384 0.0444262
12 DA -0.0031907 0.0418789 0.0450696
13 DCA -0.0038713 0.0408991 0.0447705
14 DD 0.0061790 0.0491528 0.0429738
15 DF 0.0007822 0.0450892 0.0443070
16 DHL 0.0014340 0.0447684 0.0433344
17 DO 0.0022267 0.0460577 0.0438310
18 DS 0.0004528 0.0425413 0.0420884
19 FA 0.0004168 0.0458185 0.0454018
20 FAT 0.0007623 0.0481167 0.0473544
21 FE -0.0009774 0.0424165 0.0433939
22 FEED -0.0038759 0.0411356 0.0450115
23 FL -0.0036960 0.0414489 0.0451449
24 FOOT 0.0004412 0.0467042 0.0462629
25 FTP -0.0010778 0.0460963 0.0471741
26 HH -0.0038249 0.0408428 0.0446677
27 HHE -0.0038236 0.0417358 0.0455595
28 HL -0.0025503 0.0415301 0.0440805
29 IH 0.0062855 0.0573568 0.0510713
30 LOC -0.0030185 0.0414563 0.0444747
31 LP 0.0035770 0.0466241 0.0430471
32 MDR -0.0000516 0.0436466 0.0436982
33 ME -0.0004452 0.0460496 0.0464948
34 MET 0.0032156 0.0471196 0.0439039
35 MILK -0.0038237 0.0437704 0.0475941
36 MOB 0.0008792 0.0466741 0.0457949
37 MR 0.0028254 0.0470341 0.0442086
38 MS 0.0090298 0.0540132 0.0449834
39 MSL 0.0160455 0.0589145 0.0428690
40 MSP -0.0023812 0.0427245 0.0451057
41 MT 0.0037810 0.0454361 0.0416552
42 PROT 0.0001421 0.0511577 0.0510155
43 RUM -0.0035598 0.0408464 0.0444062
44 SCK -0.0027305 0.0412117 0.0439421
45 SCS -0.0007321 0.0429152 0.0436472
46 SH 0.0004643 0.0439017 0.0434374
47 ST -0.0020640 0.0431099 0.0451738
48 SU 0.0039131 0.0476895 0.0437764
49 TL -0.0029717 0.0424665 0.0454382
50 TU -0.0036547 0.0410737 0.0447284
51 UD 0.0046866 0.0458862 0.0411996
52 UF -0.0020001 0.0410347 0.0430348
53 UT 0.0128310 0.0558310 0.0430000
54 WL -0.0000350 0.0424724 0.0425074

Adjusted_R_Squared_Model_MIN = Model without Birth Year
Adjusted_R_Squared_MIN.BY = Model adding Birth Year

29.2.4 Adding BIRTH_YEAR and SAMPLING DATE

The regression model added the BY and SAMPLING DATE is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + SAMPLING\_DATE + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(SAMPLING\_DATE\) is the cortisol sampling date for the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The SAMPLING_DATE variable is also converted to a factor to account for the categorical nature of sampling date.

# Convert BIRTH_YEAR to a factor and rename
data_final$BIRTH_YEAR <- as.factor(data_final$BIRTH_YEAR)

# Convert Sampling_data to a factor and rename
data_final$Sampling_date <- as.factor(data_final$Sampling_date)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 5:ncol(data_final)) {
  trait_name <- colnames(data_final)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data_final[[2]] ~ data_final[[i]] + data_final$BIRTH_YEAR + data_final$Sampling_date , data = data_final)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR and SAMPLING_DATE

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
CO 0.3913617 0.3257757 5.967151 0 -11.1849879 0.0099861
BMR 0.3899307 0.3241904 5.931386 0 14.4612444 0.0135665
LP 0.3858330 0.3196512 5.829896 0 12.2619950 0.0330355
MILK 0.3850052 0.3187343 5.809559 0 0.0671160 0.0396652
PROT 0.3841834 0.3178238 5.789422 0 2.2166395 0.0476301
UT 0.3822859 0.3157218 5.743130 0 -12.1238483 0.0731558
CK 0.3801906 0.3134008 5.692345 0 12.7333709 0.1192726
HHE 0.3795579 0.3126999 5.677076 0 7.4003505 0.1388525
MSP 0.3793006 0.3124149 5.670876 0 -7.2388952 0.1478152
DA 0.3791391 0.3122360 5.666989 0 10.8037510 0.1537699
TU 0.3785722 0.3116080 5.653351 0 -5.2064639 0.1769405
MSL 0.3777098 0.3106526 5.632656 0 -8.6230634 0.2203509
BQ 0.3775224 0.3104450 5.628166 0 -7.7284891 0.2313766
IH 0.3772456 0.3101385 5.621542 0 -4.7354916 0.2488963
ST 0.3767980 0.3096426 5.610839 0 -9.9721644 0.2808121
LOC 0.3766890 0.3095218 5.608233 0 -7.2367609 0.2893509
FAT 0.3765584 0.3093772 5.605116 0 0.8520581 0.3000062
UD 0.3763515 0.3091480 5.600177 0 -9.3297651 0.3179533
CA 0.3757417 0.3084725 5.585641 0 6.0756898 0.3798799
MS 0.3755038 0.3082090 5.579979 0 -6.0375749 0.4085912
SCK 0.3754363 0.3081342 5.578372 0 -4.4161473 0.4173165
SCS 0.3754033 0.3080976 5.577587 0 3.9080681 0.4216769
IDD 0.3752677 0.3079474 5.574362 0 3.7029601 0.4403519
FOOT 0.3751830 0.3078536 5.572349 0 17.3309360 0.4526548
TL 0.3750803 0.3077398 5.569908 0 -6.6585204 0.4683140
MET 0.3749055 0.3075462 5.565755 0 -3.4014191 0.4970656
CTFS 0.3747600 0.3073850 5.562300 0 4.0557716 0.5233387
CONF 0.3746137 0.3072229 5.558828 0 -4.7896643 0.5523352
FE 0.3745078 0.3071056 5.556316 0 -2.9375586 0.5752627
HL 0.3743988 0.3069849 5.553731 0 3.4274416 0.6009102
FA 0.3742870 0.3068610 5.551080 0 -3.2821641 0.6298652
DD 0.3742836 0.3068573 5.551001 0 2.5402141 0.6307730
MOB 0.3742556 0.3068263 5.550337 0 3.0592146 0.6385442
FTP 0.3742359 0.3068045 5.549870 0 4.8583743 0.6441419
BCS 0.3741681 0.3067293 5.548263 0 2.6692352 0.6643615
HH 0.3740753 0.3066266 5.546066 0 2.0230249 0.6947785
DF 0.3740392 0.3065865 5.545209 0 2.3681806 0.7076996
FL 0.3739824 0.3065237 5.543865 0 -2.1711035 0.7294613
UF 0.3739605 0.3064994 5.543347 0 2.8880683 0.7384413
MDR 0.3738855 0.3064163 5.541571 0 1.8495373 0.7722558
WL 0.3738548 0.3063822 5.540843 0 1.2643028 0.7878801
AFS 0.3738293 0.3063540 5.540239 0 1.5814380 0.8018670
RUM 0.3738023 0.3063241 5.539601 0 -1.2574561 0.8179113
SH 0.3738015 0.3063233 5.539583 0 1.0342843 0.8184040
BD 0.3736958 0.3062061 5.537080 0 0.7613199 0.9071017
MR 0.3736957 0.3062060 5.537078 0 0.6291752 0.9071898
SU 0.3736871 0.3061965 5.536876 0 0.4745538 0.9186634
FEED 0.3736818 0.3061907 5.536751 0 0.3944746 0.9266753
DHL 0.3736774 0.3061858 5.536647 0 -0.5919776 0.9340753
DO 0.3736757 0.3061838 5.536604 0 0.5220723 0.9373261
DS 0.3736695 0.3061769 5.536458 0 0.3979132 0.9502668
CW 0.3736676 0.3061749 5.536414 0 0.3562368 0.9547808
ME 0.3736658 0.3061729 5.536370 0 -0.2401976 0.9599012
MT 0.3736595 0.3061659 5.536223 0 -0.0976386 0.9882642
DCA 0.3736593 0.3061657 5.536217 0 0.0802555 0.9906432
Fitting Birth_Year and Sampling_date to the model these are the traits with significant correlation:
  • CO = Cystic ovaries
  • BMR = Body Maintenance Requirements
  • LP = Lactation persistency
  • MILK = Milk yield
  • PROT = Protein yield
  • UT = Udder Texture

30 Heritability estimation - BLUPF90

30.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • Parameter file

      30.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Phenotype THIRD COLUMN = Fixed Effect 1 FOURTH COLUMN = Fixed Effect 2

      To get in one file these four columns we need the following code:

      #File with equivalence among different ids
      eq_ids <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
      
      # Genotype file with cid
      geno <- read.table("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
      
      # Phenotipic file and fixed effects
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")
      
      # creating a pheno file with only ID, Cortisol, BY and Sam date columns
      pheno <- data_final %>%
        select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column iid and and bring the iid from eq_ids to geno file
      pheno$iid <- eq_ids$iid[match(pheno$ID, eq_ids$elora_id)]
      
      # organizing columns sequence and keep only iid
      pheno <- pheno%>%
        select(iid, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column geno$iid, and bring the iid from eq_ids to geno file
      geno$iid <- eq_ids$iid[match(geno$V2, eq_ids$cdn_id)]
      
      # organizing the columns sequence
      library(dplyr)
      geno <- geno %>%
        select(V1, V2, iid, everything())
      
      # Keeping in the pheno file only the rows present also in geno file
      pheno <- pheno %>%
        filter(iid %in% geno$iid)
      
      write.table(pheno, "/home/bambrozi/2_CORTISOL/Heritability_BLUPF90/pheno_fix_eff.txt", sep = " ", col.names = FALSE, row.names = FALSE, quote = FALSE)

      The file should be saved as text file, with separation by space and no columns names.

      30.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Sire ID THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      # to replace comma for space in the .csv file with the equivalence among IDs
      sed -i 's/,/ /g' bruno_ids.csv

      30.1.3 Genotype file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      30.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+
      • renumF90

      30.1.5 Parameter file

      The appearance of this file is like this:

      My Image

      • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
      • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
      • FIELDS_PASSED TO OUTPUT:
      • WEIGHT (S):
      • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
      • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
      • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
      • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
      • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
      • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
      • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
      • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
      • OPTION method: VCE (Variance Component Estimation).
      • OPTION OrigID: this will keep the original ID informed.
      • OPTION missing 9999: you are informing that missing values will appear as 9999
      • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
        • H2_1: the name that your function will appear on the output files.
        • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
        • **_1_1**: this effect is in the 1st column.
        • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

30.2 Running renumF90 and BlupF90+

  1. Go to the server you wanna run this analysis, for instance, grand

  2. Now go to the directory you have created to run this analysis where that set of files are placed.

ssh grand
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will as you the name of your parameter card.

renumF90 will generate a new parameter card called renf90.par

  1. Run blupf90+
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the new one renf90.par

blupF90+ will generate the blupf90.log file with the results.

My Image

Now you should update you renf90.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90.par (CO) VARIANCE

  1. 2nd blupf90+ run
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the UPDATED renf90.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

30.3 Running renumF90 and BlupF90+ adding GENOTYPES

The previous analysis considered only the pedigree, but now we can insert the genotype information. To perform this you need a new diretory called Blup_Genomic inside your previously created directory.

Now you need add the reference for your genotype file in your previous parameter file renum.par and save in this new sub-directory.

The highlighted text show the added part.

My Image

Go to the sub-diretory

Note that as you are in the subdirectory, but your phenotype and fixed effect, pedigree and genotype files are still in the previous directory you need to add the highlighted part to inform the correct location

My Image

To run the renumf90 and blupf90+ you also need to add ../ to correct specify the location. Run renumF90

./../renumf90

Run blupf90+

./../blupf90+

The steps for run, update parameter card, re-run are the same.

30.4 Results

We have 2 different output files

  1. Variance components: blupf90.log

My Image

In this file we can find the heritabilit (SD) and for instance the convergence (similarity)

  1. Solutions: solutions.orig
    In this file we will find the solutions (results) for each effect

In our example:

  • EFFECT 1: Birth Year, has 4 levels (2018, 2019, 2020 and 2021), and the solution that for this fixed effect is how much each level add.
  • EFFECT 2: Sampling date, has 23 levels, and the solutions
  • EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.

    My Image

31 Storage 1

32 GWAS - EXTREME PHENO - WITH BY and SD

32.1 Data preparation

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H", 
                           ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]

# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]

#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)

#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)

We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals

On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt

plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}

32.2 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

After ran the GWAS above I got the following message from the GCTA:

Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.

Than a ran this new code with NO CONSTRAIN

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result_NO_CONSTRAIN
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --reml-no-constrain \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

32.2.1 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result_NO_CONSTRAIN.loco.mlma", 
                  head=T, stringsAsFactors = F)


gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf<- -log10(0.15/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topright", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")


library(tidyverse)

SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt")

The SNPs below are the significant after the correction above

X Chr SNP bp A1 A2 Freq b se p logP
1 3 BTB-00148619 105840426 A C 0.321429 410.739 104.530 0.0000852 4.069736
2 3 ARS-BFGL-NGS-118207 110866523 A C 0.421429 402.730 103.634 0.0001019 3.991928
3 22 ARS-BFGL-NGS-23411 5134078 A C 0.214286 415.637 109.100 0.0001391 3.856526

My Image

Bellow we inserted the rsID searched at SNPCHIMP

X Chr rsID SNP bp A1 A2 Freq b se p logP
1 3 rs43357861 BTB-00148619 105840426 A C 0.321429 410.739 104.530 0.0000852 4.069736
2 3 rs110081798 ARS-BFGL-NGS-118207 110866523 A C 0.421429 402.730 103.634 0.0001019 3.991928
3 22 rs110031217 ARS-BFGL-NGS-23411 5134078 A C 0.214286 415.637 109.100 0.0001391 3.856526

32.2.2 GALLO

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.csv")

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.csv")

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 3 BTB-00148619 105840426 3 105807211 105810027 2817 + ENSBTAG00000020622 COL9A2 lncRNA
2 3 BTB-00148619 105840426 3 105824376 105867157 42782 - ENSBTAG00000044178 ZMPSTE24 protein_coding
3 3 BTB-00148619 105840426 3 105884724 105887360 2637 - ENSBTAG00000019698 TMCO2 protein_coding
4 3 ARS-BFGL-NGS-118207 110866523 3 110769753 110827938 58186 + ENSBTAG00000013867 DLGAP3 protein_coding
5 3 ARS-BFGL-NGS-118207 110866523 3 110835276 110839536 4261 + ENSBTAG00000014235 SMIM12 protein_coding
6 3 ARS-BFGL-NGS-118207 110866523 3 110888198 110890904 2707 - ENSBTAG00000013881 GJA4 protein_coding
7 3 ARS-BFGL-NGS-118207 110866523 3 110900183 110905849 5667 - ENSBTAG00000012584 GJB3 protein_coding
8 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 3 BTB-00148619 105840426 Production 105835011 105835015 287197
2 3 ARS-BFGL-NGS-118207 110866523 Meat_and_Carcass 110866521 110866525 152332
3 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
4 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
5 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601

32.2.2.1 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_name_extr_15.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_type_extr_15.csv")


#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
2 Muscle carnosine content 3 67 5 163224 0.0000000 0.0000000 Meat and Carcass
3 Polyunsaturated fatty acid content 1 28 5 163224 0.0008574 0.0012861 Meat and Carcass
1 Body depth 1 616 5 163224 0.0187281 0.0187281 Production

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Meat and Carcass 4 18258 5 163224 0.0007125 0.0014251
2 Production 1 19640 5 163224 0.4732476 0.4732476

My Image

32.2.3 gPROFILER online

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

\[ H_3 = \begin{pmatrix} 14 & 15 \\ 16 & 17 \\ 18 & 19 \\ 20 & 21 \end{pmatrix} \]